batch_mode_batch_workspace/
workspace_clone.rs

1// ---------------- [ File: batch-mode-batch-workspace/src/workspace_clone.rs ]
2crate::ix!();
3
4pub trait DeepClone: Sized {
5
6    type Error;
7    /// Creates a new temporary directory, copies the original's directory contents into it, and
8    /// returns a new `BatchWorkspace` that references this new environment.
9    fn deep_clone(&self) -> Result<Self,Self::Error>;
10}
11
12impl DeepClone for BatchWorkspace {
13
14    type Error = BatchWorkspaceError;
15
16    fn deep_clone(&self) -> Result<Self,Self::Error> {
17        Ok(self.clone_as_fresh_temp()?)
18    }
19}
20
21/// A trait defining how to "deep clone" a `BatchWorkspace` into a fully fresh temporary directory,
22/// replicating its directory structure (workdir, logs, done, etc.), as well as copying over any
23/// existing files.
24///
25/// - If `temp_dir` is `Some(...)` in the original workspace, we copy all relevant directories/files
26///   from the old temporary location to a new `TempDir`.
27/// - If `temp_dir` is `None`, meaning this workspace wasn't "ephemeral", we still **create** a new
28///   `TempDir` for the clone. We then copy the source workspace's directories into this newly
29///   created temporary location. In effect, the clone always gets an independent ephemeral
30///   environment.
31///
32/// Thus, regardless of whether the original is ephemeral or permanent, the resulting clone is always
33/// ephemeral (with its own `TempDir`), containing a **deep copy** of the original workspace’s layout
34/// and files.
35///
36/// # Important Caveats
37/// 1. **Performance**: Copying all directories/files can be expensive for large workspaces.
38/// 2. **Divergence**: Once cloned, the new workspace will **not** share changes with the original.
39/// 3. **Potential Partial Copy**: You might want to selectively copy only certain subdirectories or
40///    files. That requires custom logic.
41/// 4. **Error Handling**: Below is a fairly minimal approach to handle typical I/O errors. Adjust
42///    as needed for production usage.
43pub trait CloneAsFreshTemporary {
44    fn clone_as_fresh_temp(&self) -> io::Result<Self> where Self: Sized;
45}
46
47/// The robust approach for "deep cloning" a workspace into a new `TempDir`.
48impl CloneAsFreshTemporary for BatchWorkspace {
49    fn clone_as_fresh_temp(&self) -> io::Result<Self> {
50        // 1) Create a brand new temp dir
51        let new_tempdir = TempDir::new()?;
52
53        // 2) Build the analogous directory structure under the new temp dir
54        let new_product_root = new_tempdir.path();
55        let new_workdir      = new_product_root.join("workdir");
56        let new_logdir       = new_product_root.join("logs");
57        let new_done_dir     = new_product_root.join("done");
58        let new_failed_items = new_product_root.join("failed-items");
59        let new_target_dir   = new_product_root.join("target");
60        let new_failed_json  = new_product_root.join("failed-json-repairs");
61
62        // We'll create these directories. Even if the old one didn't exist, we ensure
63        // there's a corresponding place in the new ephemeral workspace.
64        std::fs::create_dir_all(&new_workdir)?;
65        std::fs::create_dir_all(&new_logdir)?;
66        std::fs::create_dir_all(&new_done_dir)?;
67        std::fs::create_dir_all(&new_failed_items)?;
68        std::fs::create_dir_all(&new_target_dir)?;
69        std::fs::create_dir_all(&new_failed_json)?;
70
71        // 3) Copy existing content from the old workspace to the new one if it exists
72        //    This includes the main workdir plus the other "special" directories.
73        //    If you want to skip certain directories or files, handle that logic here.
74        //
75        // We'll do best-effort copying. For instance, if the user never created
76        // the old done_dir, it might not exist. We'll guard that with `if path.exists()`.
77        copy_dir_if_exists(&self.workdir(),      &new_workdir)?;
78        copy_dir_if_exists(&self.logdir(),       &new_logdir)?;
79        copy_dir_if_exists(&self.done_dir(),     &new_done_dir)?;
80        copy_dir_if_exists(&self.failed_items_dir(), &new_failed_items)?;
81        copy_dir_if_exists(&self.target_dir(),   &new_target_dir)?;
82        copy_dir_if_exists(&self.failed_json_repairs_dir(), &new_failed_json)?;
83
84        // 4) Construct the new `BatchWorkspace` referencing this fresh ephemeral environment
85        let mut new_ws = BatchWorkspaceBuilder::default()
86            .workdir(new_workdir)
87            .logdir(new_logdir)
88            .done_dir(new_done_dir)
89            .failed_items_dir(new_failed_items)
90            .target_dir(new_target_dir)
91            .failed_json_repairs_dir(new_failed_json)
92            // We'll default to `true` since we definitely have a new ephemeral environment now
93            .temporary(true)
94            .build()
95            .unwrap();
96
97        new_ws.set_temp_dir(Some(new_tempdir));
98
99        Ok(new_ws)
100    }
101}
102
103/// Copies all contents from `src` to `dst` if `src` exists and is a directory.
104/// If `src` does not exist, this is a no-op.
105fn copy_dir_if_exists(src: &Path, dst: &Path) -> io::Result<()> {
106    if !src.exists() || !src.is_dir() {
107        trace!("Source path {:?} does not exist or is not a directory; skipping copy.", src);
108        return Ok(());
109    }
110    copy_dir_recursively(src, dst)
111}
112
113/// Recursively copies all files/folders from `src` to `dst`.
114/// This simplistic approach copies symlinks as what they point to, etc.
115/// Adjust as needed for more specialized handling.
116fn copy_dir_recursively(src: &Path, dst: &Path) -> io::Result<()> {
117    for entry in std::fs::read_dir(src)? {
118        let entry = entry?;
119        let file_type = entry.file_type()?;
120        let file_name = entry.file_name();
121        let src_path  = entry.path();
122        let dst_path  = dst.join(&file_name);
123
124        if file_type.is_dir() {
125            std::fs::create_dir_all(&dst_path)?;
126            copy_dir_recursively(&src_path, &dst_path)?;
127        } else {
128            // For regular files (or symlinks), we'll just do a naive copy
129            std::fs::copy(&src_path, &dst_path)?;
130        }
131    }
132    Ok(())
133}
134
135/// EXHAUSTIVE TESTS
136#[cfg(test)]
137mod clone_as_fresh_temp_exhaustive_tests {
138    use super::*;
139    use tracing::*;
140    use tokio::fs;
141    use tokio::runtime::Runtime;
142
143    #[traced_test]
144    fn clone_as_fresh_temp_creates_completely_new_workspace() {
145        info!("Starting test: clone_as_fresh_temp_creates_completely_new_workspace");
146
147        // 1) Create an original workspace (ephemeral or not). We'll do ephemeral for simplicity.
148        let rt = Runtime::new().unwrap();
149        let original = rt.block_on(async {
150            let w = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
151            // place some files in the subdirectories
152            fs::write(w.workdir().join("file_in_workdir.txt"), b"some data")
153                .await
154                .expect("Failed to write test file in workdir");
155            fs::write(w.logdir().join("file_in_logs.txt"), b"logs data")
156                .await
157                .expect("Failed to write test file in logs");
158            fs::write(w.done_dir().join("file_in_done.txt"), b"done data")
159                .await
160                .expect("Failed to write test file in done");
161            w
162        });
163
164        // 2) Perform the clone operation
165        let cloned = original.clone_as_fresh_temp().expect("Cloning must succeed");
166        debug!("Original => {:?}", original);
167        debug!("Cloned   => {:?}", cloned);
168
169        // 3) Confirm we have a brand new `temp_dir` in the clone
170        assert!(
171            cloned.temp_dir().is_some(),
172            "Cloned must have a new ephemeral directory"
173        );
174        // It's not the same path
175        assert_ne!(original.workdir(), cloned.workdir(), "Workdir must differ");
176
177        // 4) Confirm the new workspace has the same contents
178        let rt = Runtime::new().unwrap();
179        rt.block_on(async {
180            // We read files from the cloned workspace
181            let cloned_workdir_file = cloned.workdir().join("file_in_workdir.txt");
182            let data = fs::read(&cloned_workdir_file).await.expect("File must exist in clone");
183            pretty_assert_eq!(data, b"some data");
184
185            let cloned_logs_file = cloned.logdir().join("file_in_logs.txt");
186            let data = fs::read(&cloned_logs_file).await.expect("Logs file must exist in clone");
187            pretty_assert_eq!(data, b"logs data");
188
189            let cloned_done_file = cloned.done_dir().join("file_in_done.txt");
190            let data = fs::read(&cloned_done_file).await.expect("Done file must exist in clone");
191            pretty_assert_eq!(data, b"done data");
192        });
193
194        info!("Finished test: clone_as_fresh_temp_creates_completely_new_workspace");
195    }
196
197    #[traced_test]
198    fn clone_as_fresh_temp_is_independent_after_creation() {
199        info!("Starting test: clone_as_fresh_temp_is_independent_after_creation");
200
201        // We'll confirm that modifications to the original after cloning do not affect the clone, and vice versa.
202        let rt = Runtime::new().unwrap();
203        let original = rt.block_on(async {
204            let w = BatchWorkspace::new_temp().await.unwrap();
205            fs::write(w.workdir().join("shared.txt"), b"initial").await.unwrap();
206            w
207        });
208
209        let cloned = original.clone_as_fresh_temp().unwrap();
210
211        // Modify the original after the clone
212        rt.block_on(async {
213            fs::write(original.workdir().join("shared.txt"), b"updated original").await.unwrap();
214        });
215
216        // Confirm the cloned copy is still the old data
217        rt.block_on(async {
218            let data_clone = fs::read(cloned.workdir().join("shared.txt")).await.expect("File in clone must exist");
219            pretty_assert_eq!(data_clone, b"initial", "Clone must remain unchanged after original is updated");
220
221            // Meanwhile, updating the clone doesn't affect the original
222            fs::write(cloned.workdir().join("shared.txt"), b"updated clone").await.unwrap();
223            let data_orig = fs::read(original.workdir().join("shared.txt")).await.unwrap();
224            pretty_assert_eq!(data_orig, b"updated original");
225        });
226
227        info!("Finished test: clone_as_fresh_temp_is_independent_after_creation");
228    }
229
230    #[traced_test]
231    fn copy_skips_missing_directories_gracefully() {
232        info!("Starting test: copy_skips_missing_directories_gracefully");
233        // If the original never had logs/ or done/ directories, we just skip them
234        let rt = Runtime::new().unwrap();
235        let original = rt.block_on(async {
236            let w = BatchWorkspace::new_temp().await.unwrap();
237            // We'll only populate w.workdir, ignoring logs or done
238            fs::write(w.workdir().join("somefile.txt"), b"some content").await.unwrap();
239            w
240        });
241
242        let cloned = original.clone_as_fresh_temp().unwrap();
243
244        rt.block_on(async {
245            let data = fs::read(cloned.workdir().join("somefile.txt")).await.unwrap();
246            pretty_assert_eq!(data, b"some content");
247            // logs or done subdir in original was empty or missing => no copies needed => no panic
248        });
249
250        info!("Finished test: copy_skips_missing_directories_gracefully");
251    }
252
253    #[traced_test]
254    fn clone_as_fresh_temp_handles_large_data_lightly() {
255        info!("Starting test: clone_as_fresh_temp_handles_large_data_lightly");
256        // We won't actually create huge data in a unit test, but let's do a moderate size check
257        // to confirm we don't crash or hang. We'll do ~1MB.
258        let rt = Runtime::new().unwrap();
259        let original = rt.block_on(async {
260            let w = BatchWorkspace::new_temp().await.unwrap();
261            // Create ~1MB file
262            let data = vec![b'x'; 1024 * 1024];
263            fs::write(w.workdir().join("large_file.bin"), data)
264                .await
265                .unwrap();
266            w
267        });
268
269        let cloned = original.clone_as_fresh_temp().expect("Should handle moderate data");
270        rt.block_on(async {
271            let data = fs::read(cloned.workdir().join("large_file.bin")).await.unwrap();
272            pretty_assert_eq!(data.len(), 1024 * 1024);
273        });
274
275        info!("Finished test: clone_as_fresh_temp_handles_large_data_lightly");
276    }
277
278    #[traced_test]
279    async fn concurrency_test_for_clone_as_fresh_temp() {
280        info!("Starting test: concurrency_test_for_clone_as_fresh_temp");
281
282        // We'll set up an original ephemeral workspace
283        let original = BatchWorkspace::new_temp().await
284            .expect("Failed to create temp workspace");
285        fs::write(original.workdir().join("thread_test.txt"), b"threaded")
286            .await
287            .expect("Failed to write test file");
288
289        // We'll spawn multiple tasks that each do a clone, verifying results
290        let arc_original = Arc::new(original);
291        let mut tasks = Vec::new();
292        for i in 0..4 {
293            let w = arc_original.clone();
294            tasks.push(tokio::spawn(async move {
295                debug!("Task {} => performing clone_as_fresh_temp", i);
296                let c = w.clone_as_fresh_temp().expect("Should succeed");
297                let data = fs::read(c.workdir().join("thread_test.txt"))
298                    .await
299                    .expect("Must exist in copy");
300                pretty_assert_eq!(data, b"threaded");
301                debug!("Task {} => validated clone data OK", i);
302            }));
303        }
304
305        let results = futures::future::join_all(tasks).await;
306        for (i, res) in results.into_iter().enumerate() {
307            match res {
308                Ok(_) => debug!("Task {} => success", i),
309                Err(e) => panic!("Task {} => join error: {:?}", i, e),
310            }
311        }
312
313        info!("Finished test: concurrency_test_for_clone_as_fresh_temp");
314    }
315
316    #[traced_test]
317    async fn respects_when_original_has_no_tempdir() {
318        info!("Starting test: respects_when_original_has_no_tempdir");
319
320        // We'll manually create a non-ephemeral workspace:
321        let tmp = tempdir().expect("Failed to create normal directory outside ephemeral");
322        let product_root = tmp.path().join("my_product");
323        fs::create_dir_all(&product_root)
324            .await
325            .expect("Failed to create product_root on disk");
326
327        let original = BatchWorkspace::new_in(&product_root)
328            .await
329            .expect("Failed to create workspace in product_root");
330        assert!(original.temp_dir().is_none(), "We expect no temp_dir for new_in workspace");
331
332        // Now ensure the subdirectories exist fully, so that we can place files in workdir
333        original.create_directories_if_dne().await.expect("Failed to create subdirs");
334
335        // Write some files in the normal workspace
336        let file_path = original.workdir().join("normal_file.txt");
337        fs::write(&file_path, b"hello").await.expect("Failed to write file in original workspace");
338
339        // Perform the clone operation
340        let cloned = original.clone_as_fresh_temp().expect("Clone should succeed, creating ephemeral env");
341        debug!("Original => {:?}", original);
342        debug!("Cloned   => {:?}", cloned);
343
344        // The cloned must always have a new Some(temp_dir)
345        assert!(cloned.temp_dir().is_some(), "Cloned must have ephemeral environment");
346
347        // Confirm the data is copied
348        let data = fs::read(cloned.workdir().join("normal_file.txt"))
349            .await
350            .expect("Copied file must exist in the ephemeral clone");
351        pretty_assert_eq!(data, b"hello");
352
353        info!("Finished test: respects_when_original_has_no_tempdir");
354    }
355
356    #[traced_test]
357    async fn clone_as_fresh_temp_returns_io_error_when_failing_dir_creation() {
358        info!("Starting test: clone_as_fresh_temp_returns_io_error_when_failing_dir_creation");
359
360        // We'll create a workspace that references an "unreadable" subdirectory which DOES exist,
361        // so that the attempt to copy it fails with an I/O error.
362        let tmp = tempdir().expect("Failed to create base tempdir");
363        let temp_path = tmp.path().join("unreadable");
364        std::fs::create_dir_all(&temp_path).expect("Failed to create unreadable directory");
365
366        // Create the subdirectory "unreadable/workdir" so that it DOES exist
367        let old_workdir = temp_path.join("workdir");
368        std::fs::create_dir_all(&old_workdir).expect("Failed to create old workdir inside unreadable dir");
369
370        // Now remove all perms from `old_workdir` so reading it fails
371        #[cfg(unix)]
372        {
373            use std::os::unix::fs::PermissionsExt;
374            let mut perms = std::fs::metadata(&old_workdir).unwrap().permissions();
375            perms.set_mode(0o000);
376            std::fs::set_permissions(&old_workdir, perms).unwrap();
377        }
378
379        // Build a "workspace" referencing that path
380        let workspace = BatchWorkspaceBuilder::default()
381            .workdir(temp_path.join("workdir"))
382            .logdir(temp_path.join("logs"))
383            .done_dir(temp_path.join("done"))
384            .failed_items_dir(temp_path.join("failed-items"))
385            .target_dir(temp_path.join("target"))
386            .failed_json_repairs_dir(temp_path.join("failed-json-repairs"))
387            .temporary(false)
388            .build()
389            .unwrap();
390
391        // Now attempt clone_as_fresh_temp(), expecting an IoError
392        let clone_res = workspace.clone_as_fresh_temp();
393        debug!("clone_as_fresh_temp => {:?}", clone_res);
394
395        assert!(
396            clone_res.is_err(),
397            "We forcibly removed perms => should fail with I/O error"
398        );
399
400        info!("Finished test: clone_as_fresh_temp_returns_io_error_when_failing_dir_creation");
401    }
402}