common/
filegen.rs

1use anyhow::{anyhow, Context};
2use async_recursion::async_recursion;
3use tracing::instrument;
4
5use crate::progress;
6
7/// Error type for filegen operations that preserves operation summary even on failure.
8///
9/// # Logging Convention
10/// When logging this error, use `{:#}` or `{:?}` format to preserve the error chain:
11/// ```ignore
12/// tracing::error!("operation failed: {:#}", &error); // ✅ Shows full chain
13/// tracing::error!("operation failed: {:?}", &error); // ✅ Shows full chain
14/// ```
15/// The Display implementation also shows the full chain, but workspace linting enforces `{:#}`
16/// for consistency.
17#[derive(Debug, thiserror::Error)]
18#[error("{source:#}")]
19pub struct Error {
20    #[source]
21    pub source: anyhow::Error,
22    pub summary: Summary,
23}
24
25impl Error {
26    #[must_use]
27    pub fn new(source: anyhow::Error, summary: Summary) -> Self {
28        Error { source, summary }
29    }
30}
31
32#[derive(Copy, Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
33pub struct Summary {
34    pub files_created: usize,
35    pub directories_created: usize,
36    pub bytes_written: u64,
37}
38
39impl std::ops::Add for Summary {
40    type Output = Self;
41    fn add(self, other: Self) -> Self {
42        Self {
43            files_created: self.files_created + other.files_created,
44            directories_created: self.directories_created + other.directories_created,
45            bytes_written: self.bytes_written + other.bytes_written,
46        }
47    }
48}
49
50impl std::fmt::Display for Summary {
51    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
52        write!(
53            f,
54            "files created: {}\n\
55            directories created: {}\n\
56            bytes written: {}",
57            self.files_created,
58            self.directories_created,
59            bytesize::ByteSize(self.bytes_written)
60        )
61    }
62}
63
64/// Configuration for file generation
65#[derive(Debug, Clone)]
66pub struct FileGenConfig {
67    /// Root directory for file generation
68    pub root: std::path::PathBuf,
69    /// Directory width at each level
70    pub dirwidth: Vec<usize>,
71    /// Number of files to generate at each leaf
72    pub numfiles: usize,
73    /// Size of each file in bytes
74    pub filesize: usize,
75    /// Write buffer size in bytes
76    pub writebuf: usize,
77    /// Chunk size for I/O throttling
78    pub chunk_size: u64,
79    /// Whether to generate files at leaf directories only
80    pub leaf_files: bool,
81}
82
83impl FileGenConfig {
84    /// Create a new file generation configuration
85    pub fn new(
86        root: impl Into<std::path::PathBuf>,
87        dirwidth: Vec<usize>,
88        numfiles: usize,
89        filesize: usize,
90    ) -> Self {
91        Self {
92            root: root.into(),
93            dirwidth,
94            numfiles,
95            filesize,
96            writebuf: 1024 * 1024, // 1MB default
97            chunk_size: 0,
98            leaf_files: false,
99        }
100    }
101}
102
103#[instrument(skip(prog_track))]
104pub async fn write_file(
105    prog_track: &'static progress::Progress,
106    path: std::path::PathBuf,
107    mut filesize: usize,
108    bufsize: usize,
109    chunk_size: u64,
110) -> Result<Summary, Error> {
111    use rand::Rng;
112    use tokio::io::AsyncWriteExt;
113    let _permit = throttle::open_file_permit().await;
114    throttle::get_file_iops_tokens(chunk_size, filesize as u64).await;
115    let _ops_guard = prog_track.ops.guard();
116    let original_filesize = filesize;
117    let mut bytes = vec![0u8; bufsize];
118    let mut file = tokio::fs::OpenOptions::new()
119        .write(true)
120        .create(true)
121        .truncate(false)
122        .open(&path)
123        .await
124        .with_context(|| format!("Error opening {:?}", &path))
125        .map_err(|err| Error::new(err, Default::default()))?;
126    while filesize > 0 {
127        {
128            // make sure rng falls out of scope before await
129            let mut rng = rand::thread_rng();
130            rng.fill(&mut bytes[..]);
131        }
132        let writesize = std::cmp::min(filesize, bufsize);
133        file.write_all(&bytes[..writesize])
134            .await
135            .with_context(|| format!("Error writing to {:?}", &path))
136            .map_err(|err| Error::new(err, Default::default()))?;
137        filesize -= writesize;
138    }
139    prog_track.files_copied.inc();
140    prog_track.bytes_copied.add(original_filesize as u64);
141    Ok(Summary {
142        files_created: 1,
143        bytes_written: original_filesize as u64,
144        ..Default::default()
145    })
146}
147
148#[async_recursion]
149#[instrument(skip(prog_track))]
150pub async fn filegen(
151    prog_track: &'static progress::Progress,
152    config: &FileGenConfig,
153) -> Result<Summary, Error> {
154    let FileGenConfig {
155        root,
156        dirwidth,
157        numfiles,
158        filesize,
159        writebuf,
160        chunk_size,
161        leaf_files,
162    } = config;
163    let numdirs = *dirwidth.first().unwrap_or(&0);
164    let mut join_set = tokio::task::JoinSet::new();
165    // generate directories and recurse into them
166    for i in 0..numdirs {
167        let path = root.join(format!("dir{i}"));
168        let next_dirwidth = dirwidth[1..].to_vec();
169        let recurse_config = FileGenConfig {
170            root: path.clone(),
171            dirwidth: next_dirwidth,
172            numfiles: *numfiles,
173            filesize: *filesize,
174            writebuf: *writebuf,
175            chunk_size: *chunk_size,
176            leaf_files: *leaf_files,
177        };
178        let recurse = || async move {
179            tokio::fs::create_dir(&path)
180                .await
181                .with_context(|| format!("Error creating directory {:?}", &path))
182                .map_err(|err| Error::new(err, Default::default()))?;
183            prog_track.directories_created.inc();
184            let dir_summary = Summary {
185                directories_created: 1,
186                ..Default::default()
187            };
188            let recurse_summary = filegen(prog_track, &recurse_config).await?;
189            Ok(dir_summary + recurse_summary)
190        };
191        join_set.spawn(recurse());
192    }
193    // generate files (only if we're not in leaf_files mode, or if we are a leaf directory)
194    // a directory is a leaf when dirwidth is empty (no more subdirectories to create)
195    let is_leaf = dirwidth.is_empty();
196    let should_generate_files = !leaf_files || is_leaf;
197    if should_generate_files {
198        for i in 0..*numfiles {
199            // it's better to await the token here so that we throttle how many tasks we spawn. the
200            // ops-throttle will never cause a deadlock (unlike max-open-files limit) so it's safe to
201            // do here.
202            throttle::get_ops_token().await;
203            let path = root.join(format!("file{i}"));
204            join_set.spawn(write_file(
205                prog_track,
206                path,
207                *filesize,
208                *writebuf,
209                *chunk_size,
210            ));
211        }
212    }
213    let mut success = true;
214    let mut last_error: Option<anyhow::Error> = None;
215    let mut filegen_summary = Summary::default();
216    while let Some(res) = join_set.join_next().await {
217        match res.map_err(|err| Error::new(err.into(), Default::default()))? {
218            Ok(summary) => filegen_summary = filegen_summary + summary,
219            Err(error) => {
220                tracing::error!("filegen: {:?} failed with: {:#}", root, &error);
221                filegen_summary = filegen_summary + error.summary;
222                if last_error.is_none() {
223                    last_error = Some(error.source);
224                }
225                success = false;
226            }
227        }
228    }
229    if !success {
230        let error = if let Some(error) = last_error {
231            error.context(format!("filegen: {:?} failed!", &root))
232        } else {
233            anyhow!("filegen: {:?} failed!", &root)
234        };
235        return Err(Error::new(error, filegen_summary));
236    }
237    Ok(filegen_summary)
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use crate::testutils;
244    use std::os::unix::fs::PermissionsExt;
245    use tracing_test::traced_test;
246
247    lazy_static! {
248        static ref PROGRESS: progress::Progress = progress::Progress::new();
249    }
250
251    #[tokio::test]
252    #[traced_test]
253    async fn test_basic_filegen() -> Result<(), anyhow::Error> {
254        let tmp_dir = testutils::create_temp_dir().await?;
255        let test_path = tmp_dir.as_path();
256        // generate 2 subdirectories with 3 files per directory (including root)
257        let config = FileGenConfig {
258            root: test_path.to_path_buf(),
259            dirwidth: vec![2],
260            numfiles: 3,
261            filesize: 100,
262            writebuf: 50,
263            chunk_size: 0,
264            leaf_files: false,
265        };
266        let summary = filegen(&PROGRESS, &config).await?;
267        // verify summary
268        // files: 3 (in root) + 3 (in dir0) + 3 (in dir1) = 9 files
269        // directories: 2 (dir0, dir1)
270        // bytes: 100 bytes × 9 files = 900 bytes
271        assert_eq!(summary.files_created, 9);
272        assert_eq!(summary.directories_created, 2);
273        assert_eq!(summary.bytes_written, 900);
274        // verify files were actually created
275        assert!(test_path.join("file0").exists()); // root level files
276        assert!(test_path.join("dir0").join("file0").exists());
277        assert!(test_path.join("dir0").join("file1").exists());
278        assert!(test_path.join("dir0").join("file2").exists());
279        assert!(test_path.join("dir1").join("file0").exists());
280        assert!(test_path.join("dir1").join("file1").exists());
281        assert!(test_path.join("dir1").join("file2").exists());
282        // verify file sizes
283        let metadata = tokio::fs::metadata(test_path.join("dir0").join("file0")).await?;
284        assert_eq!(metadata.len(), 100);
285        // cleanup
286        tokio::fs::remove_dir_all(test_path).await?;
287        Ok(())
288    }
289
290    #[tokio::test]
291    #[traced_test]
292    async fn test_nested_filegen() -> Result<(), anyhow::Error> {
293        let tmp_dir = testutils::create_temp_dir().await?;
294        let test_path = tmp_dir.as_path();
295        // generate nested structure: 2 top-level dirs, each with 3 subdirs, 4 files per dir, 50 bytes each
296        let config = FileGenConfig {
297            root: test_path.to_path_buf(),
298            dirwidth: vec![2, 3],
299            numfiles: 4,
300            filesize: 50,
301            writebuf: 25,
302            chunk_size: 0,
303            leaf_files: false,
304        };
305        let summary = filegen(&PROGRESS, &config).await?;
306        // calculate expected values:
307        // directories: 2 top-level + (2 × 3) subdirs = 8 total
308        // files: 4 (in root) + 4×2 (in dir0, dir1) + 4×2×3 (in all leaf dirs) = 4 + 8 + 24 = 36 files
309        // bytes: 50 bytes × 36 files = 1800 bytes
310        assert_eq!(summary.files_created, 36);
311        assert_eq!(summary.directories_created, 8);
312        assert_eq!(summary.bytes_written, 1800);
313        // spot check some files exist
314        assert!(test_path.join("file0").exists()); // root files
315        assert!(test_path.join("dir0").join("file0").exists()); // top-level dir files
316        assert!(test_path.join("dir0").join("dir0").join("file0").exists());
317        assert!(test_path.join("dir0").join("dir2").join("file3").exists());
318        assert!(test_path.join("dir1").join("dir1").join("file2").exists());
319        // cleanup
320        tokio::fs::remove_dir_all(test_path).await?;
321        Ok(())
322    }
323
324    #[tokio::test]
325    #[traced_test]
326    async fn test_deeply_nested_filegen() -> Result<(), anyhow::Error> {
327        let tmp_dir = testutils::create_temp_dir().await?;
328        let test_path = tmp_dir.as_path();
329        // generate 3 levels: 2,2,2 with 2 files each, 10 bytes per file
330        let config = FileGenConfig {
331            root: test_path.to_path_buf(),
332            dirwidth: vec![2, 2, 2],
333            numfiles: 2,
334            filesize: 10,
335            writebuf: 10,
336            chunk_size: 0,
337            leaf_files: false,
338        };
339        let summary = filegen(&PROGRESS, &config).await?;
340        // directories: 2 + (2×2) + (2×2×2) = 2 + 4 + 8 = 14 dirs
341        // files: 2 (root) + 2×2 (level 1) + 2×2×2 (level 2) + 2×2×2×2 (level 3) = 2 + 4 + 8 + 16 = 30 files
342        // bytes: 10 bytes × 30 files = 300 bytes
343        assert_eq!(summary.files_created, 30);
344        assert_eq!(summary.directories_created, 14);
345        assert_eq!(summary.bytes_written, 300);
346        // verify deep nesting works
347        assert!(test_path.join("file0").exists()); // root files
348        assert!(test_path
349            .join("dir0")
350            .join("dir0")
351            .join("dir0")
352            .join("file0")
353            .exists());
354        assert!(test_path
355            .join("dir1")
356            .join("dir1")
357            .join("dir1")
358            .join("file1")
359            .exists());
360        // cleanup
361        tokio::fs::remove_dir_all(test_path).await?;
362        Ok(())
363    }
364
365    #[tokio::test]
366    #[traced_test]
367    async fn test_single_file() -> Result<(), anyhow::Error> {
368        let tmp_dir = testutils::create_temp_dir().await?;
369        let test_path = tmp_dir.as_path();
370        // generate just files, no directories
371        let config = FileGenConfig {
372            root: test_path.to_path_buf(),
373            dirwidth: vec![],
374            numfiles: 5,
375            filesize: 200,
376            writebuf: 100,
377            chunk_size: 0,
378            leaf_files: false,
379        };
380        let summary = filegen(&PROGRESS, &config).await?;
381        assert_eq!(summary.files_created, 5);
382        assert_eq!(summary.directories_created, 0);
383        assert_eq!(summary.bytes_written, 1000); // 200 × 5
384        for i in 0..5 {
385            // verify files
386            let file_path = test_path.join(format!("file{i}"));
387            assert!(file_path.exists());
388            let metadata = tokio::fs::metadata(&file_path).await?;
389            assert_eq!(metadata.len(), 200);
390        }
391        // cleanup
392        tokio::fs::remove_dir_all(test_path).await?;
393        Ok(())
394    }
395
396    #[tokio::test]
397    #[traced_test]
398    async fn test_zero_files() -> Result<(), anyhow::Error> {
399        let tmp_dir = testutils::create_temp_dir().await?;
400        let test_path = tmp_dir.as_path();
401        // generate only directories, no files
402        let config = FileGenConfig {
403            root: test_path.to_path_buf(),
404            dirwidth: vec![3, 2],
405            numfiles: 0,
406            filesize: 100,
407            writebuf: 50,
408            chunk_size: 0,
409            leaf_files: false,
410        };
411        let summary = filegen(&PROGRESS, &config).await?;
412        // directories: 3 + (3×2) = 9 dirs
413        assert_eq!(summary.files_created, 0);
414        assert_eq!(summary.directories_created, 9);
415        assert_eq!(summary.bytes_written, 0);
416        // verify directories exist but no files
417        assert!(test_path.join("dir0").join("dir0").exists());
418        assert!(test_path.join("dir2").join("dir1").exists());
419        assert!(!test_path.join("dir0").join("file0").exists());
420        // cleanup
421        tokio::fs::remove_dir_all(test_path).await?;
422        Ok(())
423    }
424
425    #[tokio::test]
426    #[traced_test]
427    async fn test_leaf_files_only() -> Result<(), anyhow::Error> {
428        let tmp_dir = testutils::create_temp_dir().await?;
429        let test_path = tmp_dir.as_path();
430        // generate with leaf_files=true, meaning files only in deepest directories
431        let config = FileGenConfig {
432            root: test_path.to_path_buf(),
433            dirwidth: vec![2, 3],
434            numfiles: 4,
435            filesize: 50,
436            writebuf: 25,
437            chunk_size: 0,
438            leaf_files: true,
439        };
440        let summary = filegen(&PROGRESS, &config).await?;
441        // directories: 2 top-level + (2 × 3) subdirs = 8 total
442        // files: ONLY in leaf dirs (6 leaf dirs) × 4 files each = 24 files
443        // bytes: 50 bytes × 24 files = 1200 bytes
444        assert_eq!(summary.files_created, 24);
445        assert_eq!(summary.directories_created, 8);
446        assert_eq!(summary.bytes_written, 1200);
447        // verify NO files in root or intermediate directories
448        assert!(!test_path.join("file0").exists()); // no root files
449        assert!(!test_path.join("dir0").join("file0").exists()); // no intermediate files
450        assert!(!test_path.join("dir1").join("file0").exists());
451        // verify files ONLY in leaf directories
452        assert!(test_path.join("dir0").join("dir0").join("file0").exists());
453        assert!(test_path.join("dir0").join("dir0").join("file3").exists());
454        assert!(test_path.join("dir0").join("dir2").join("file0").exists());
455        assert!(test_path.join("dir1").join("dir1").join("file0").exists());
456        // cleanup
457        tokio::fs::remove_dir_all(test_path).await?;
458        Ok(())
459    }
460
461    #[tokio::test]
462    #[traced_test]
463    async fn test_permission_error_includes_root_cause() -> Result<(), anyhow::Error> {
464        let tmp_dir = testutils::create_temp_dir().await?;
465        let root = tmp_dir.join("readonly");
466        tokio::fs::create_dir(&root).await?;
467        tokio::fs::set_permissions(&root, std::fs::Permissions::from_mode(0o555)).await?;
468
469        let config = FileGenConfig {
470            root: root.clone(),
471            dirwidth: Vec::new(),
472            numfiles: 1,
473            filesize: 10,
474            writebuf: 10,
475            chunk_size: 0,
476            leaf_files: false,
477        };
478        let result = filegen(&PROGRESS, &config).await;
479
480        // restore permissions to allow cleanup
481        tokio::fs::set_permissions(&root, std::fs::Permissions::from_mode(0o755)).await?;
482
483        assert!(
484            result.is_err(),
485            "filegen inside read-only directory should fail"
486        );
487        let err = result.unwrap_err();
488        let err_msg = format!("{:#}", err.source);
489        assert!(
490            err_msg.to_lowercase().contains("permission denied") || err_msg.contains("EACCES"),
491            "Error message must include permission denied text. Got: {}",
492            err_msg
493        );
494        Ok(())
495    }
496}