batch_mode_batch_workspace/
locate_batch_files.rs

1// ---------------- [ File: batch-mode-batch-workspace/src/locate_batch_files.rs ]
2crate::ix!();
3
4#[async_trait]
5impl<T> LocateBatchFiles for T
6where
7    T: BatchWorkspaceInterface + Send + Sync + 'static,
8{
9    type Error = BatchWorkspaceError;
10
11    async fn locate_batch_files(
12        self: Arc<Self>,
13        index: &BatchIndex
14    ) -> Result<Option<BatchFileTriple>, Self::Error> {
15        // We'll figure out whether to expect integer or UUID in "core" by simply
16        // building a pattern for whichever index variant is given (Usize or Uuid),
17        // plus an optional suffix.
18        let core_str = match index {
19            BatchIndex::Usize(_) => r"\d+",
20            BatchIndex::Uuid(_)  => r"[0-9A-Fa-f\-]{36}",
21        };
22
23        let pattern_str = format!(
24            "^batch_(?P<kind>input|output|error|metadata|seed_manifest)_(?P<core>{core_str})(?P<suffix>.*)\\.jsonl$"
25        );
26
27        let pattern = Regex::new(&pattern_str)
28            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
29        trace!("locate_batch_files => using pattern: {}", pattern_str);
30
31        let mut input    = None;
32        let mut output   = None;
33        let mut error    = None;
34        let mut metadata = None;
35        let mut seed_manifest = None;
36
37        let mut entries = fs::read_dir(self.workdir()).await?;
38        while let Some(entry) = entries.next_entry().await? {
39            let path = entry.path();
40            if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
41                if let Some(caps) = pattern.captures(filename) {
42                    debug!("locate_batch_files => matched filename: {}", filename);
43
44                    // Now parse the "core" capture as either integer or UUID:
45                    let core_capture = &caps["core"];
46                    let this_index = if let Ok(n) = core_capture.parse::<usize>() {
47                        BatchIndex::Usize(n)
48                    } else {
49                        match BatchIndex::from_uuid_str(core_capture) {
50                            Ok(u) => u,
51                            Err(_) => {
52                                // If it doesn't parse as integer or valid UUID, skip.
53                                trace!(
54                                    "Skipping filename='{}' because core='{}' is neither integer nor valid UUID",
55                                    filename,
56                                    core_capture
57                                );
58                                continue;
59                            }
60                        }
61                    };
62
63                    // If this "this_index" doesn't match the exact index we're looking for,
64                    // skip it. (Otherwise, we might pick up partial matches in corner cases.)
65                    if this_index != *index {
66                        trace!(
67                            "Skipping filename='{}': the parsed index={:?} != requested={:?}",
68                            filename,
69                            this_index,
70                            index
71                        );
72                        continue;
73                    }
74
75                    // Now see which kind it was:
76                    match &caps["kind"] {
77                        "input" => {
78                            if input.is_some() {
79                                error!(
80                                    "Multiple input files found for index {:?} => old: {:?}, new: {:?}",
81                                    index,
82                                    input.as_ref().unwrap(),
83                                    path
84                                );
85                                return Err(io::Error::new(
86                                    io::ErrorKind::InvalidData,
87                                    "Multiple input files found"
88                                ).into());
89                            }
90                            input = Some(path);
91                        }
92                        "output" => {
93                            if output.is_some() {
94                                error!(
95                                    "Multiple output files found for index {:?} => old: {:?}, new: {:?}",
96                                    index,
97                                    output.as_ref().unwrap(),
98                                    path
99                                );
100                                return Err(io::Error::new(
101                                    io::ErrorKind::InvalidData,
102                                    "Multiple output files found"
103                                ).into());
104                            }
105                            output = Some(path);
106                        }
107                        "error" => {
108                            if error.is_some() {
109                                error!(
110                                    "Multiple error files found for index {:?} => old: {:?}, new: {:?}",
111                                    index,
112                                    error.as_ref().unwrap(),
113                                    path
114                                );
115                                return Err(io::Error::new(
116                                    io::ErrorKind::InvalidData,
117                                    "Multiple error files found"
118                                ).into());
119                            }
120                            error = Some(path);
121                        }
122                        "metadata" => {
123                            if metadata.is_some() {
124                                error!(
125                                    "Multiple metadata files found for index {:?} => old: {:?}, new: {:?}",
126                                    index,
127                                    metadata.as_ref().unwrap(),
128                                    path
129                                );
130                                return Err(io::Error::new(
131                                    io::ErrorKind::InvalidData,
132                                    "Multiple metadata files found"
133                                ).into());
134                            }
135                            metadata = Some(path);
136                        }
137
138                        "seed_manifest" => {
139                            if seed_manifest.is_some() {
140                                error!(
141                                    "Multiple seed_manifest files found for index {:?} => old: {:?}, new: {:?}",
142                                    index,
143                                    seed_manifest.as_ref().unwrap(),
144                                    path
145                                );
146                                return Err(io::Error::new(
147                                    io::ErrorKind::InvalidData,
148                                    "Multiple seed_manifest files found"
149                                ).into());
150                            }
151                            seed_manifest = Some(path);
152                        }
153
154                        unk => {
155                            warn!("Ignoring unrecognized 'kind' capture='{}' in filename='{}'", unk, filename);
156                        }
157                    }
158                } else {
159                    trace!("Filename '{}' did not match pattern => skipped", filename);
160                }
161            } else {
162                trace!("Skipping unreadable or non-UTF8 filename at path: {:?}", path);
163            }
164        }
165
166        // If we found nothing at all, return None. Otherwise, build the triple.
167        if input.is_none() && output.is_none() && error.is_none() && metadata.is_none() && seed_manifest.is_none() {
168            debug!(
169                "No matching files found for index={:?} => returning None",
170                index
171            );
172            Ok(None)
173        } else {
174            debug!(
175                "Constructing BatchFileTriple => index={:?}, input={:?}, output={:?}, error={:?}, metadata={:?}, seed_manifest={:?}",
176                index, input, output, error, metadata, seed_manifest
177            );
178            Ok(Some(BatchFileTriple::new_direct(
179                index,
180                input,
181                output,
182                error,
183                metadata,
184                seed_manifest,
185                self.clone()
186            )))
187        }
188    }
189}
190
191#[cfg(test)]
192mod locate_batch_files_exhaustive_tests {
193    use super::*;
194
195    #[traced_test]
196    async fn test_locate_batch_files_usize() -> Result<(),BatchWorkspaceError> {
197
198        let workspace = BatchWorkspace::new_temp().await?;
199        let workdir   = workspace.workdir();
200
201        fs::write(workdir.join("batch_input_4.jsonl"), b"test").await?;
202        fs::write(workdir.join("batch_output_4.jsonl"), b"test").await?;
203        fs::write(workdir.join("batch_error_4.jsonl"), b"test").await?;
204
205        let batch_files = workspace.clone().locate_batch_files(&BatchIndex::Usize(4)).await?.unwrap();
206        pretty_assert_eq!(*batch_files.input(), Some(workdir.join("batch_input_4.jsonl")));
207        pretty_assert_eq!(*batch_files.output(), Some(workdir.join("batch_output_4.jsonl")));
208        pretty_assert_eq!(*batch_files.error(), Some(workdir.join("batch_error_4.jsonl")));
209
210        Ok(())
211    }
212
213    #[traced_test]
214    async fn test_locate_batch_files_uuid() -> Result<(),BatchWorkspaceError> {
215        let workspace = BatchWorkspace::new_temp().await?;
216        let workdir   = workspace.workdir();
217
218        let uuid = "550e8400-e29b-41d4-a716-446655440000";
219        fs::write(workdir.join(format!("batch_input_{}.jsonl", uuid)), b"test").await?;
220        fs::write(workdir.join(format!("batch_output_{}.jsonl", uuid)), b"test").await?;
221
222        let batch_files = workspace.clone().locate_batch_files(&BatchIndex::from_uuid_str(uuid)?).await?.unwrap();
223        pretty_assert_eq!(*batch_files.input(), Some(workdir.join(format!("batch_input_{}.jsonl", uuid))));
224        pretty_assert_eq!(*batch_files.output(), Some(workdir.join(format!("batch_output_{}.jsonl", uuid))));
225        pretty_assert_eq!(*batch_files.error(), None);
226
227        Ok(())
228    }
229
230    #[traced_test]
231    async fn test_locate_batch_files_no_files() -> Result<(),BatchWorkspaceError> {
232        let workspace = BatchWorkspace::new_temp().await?;
233
234        let batch_files = workspace.locate_batch_files(&BatchIndex::Usize(4)).await?;
235        assert!(batch_files.is_none());
236
237        Ok(())
238    }
239
240    /// Ensures we can handle the scenario in which there are no matching files at all for the given index.
241    #[traced_test]
242    async fn returns_none_when_no_files_present_for_index() {
243        info!("Starting test: returns_none_when_no_files_present_for_index");
244        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
245        let index = BatchIndex::Usize(9999);
246
247        debug!("Invoking locate_batch_files with empty workspace and index=9999");
248        let result = workspace.clone().locate_batch_files(&index).await;
249        debug!("Result: {:?}", result);
250
251        assert!(result.is_ok(), "Should not error out if no files found");
252        let triple_option = result.unwrap();
253        assert!(triple_option.is_none(), "No files => we expect None");
254        info!("Finished test: returns_none_when_no_files_present_for_index");
255    }
256
257    /// Ensures we can locate a single input file with no other files present.
258    #[traced_test]
259    async fn locates_single_input_file() {
260        info!("Starting test: locates_single_input_file");
261        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
262        let index = BatchIndex::Usize(10);
263        let filename = format!("batch_input_{}.jsonl", 10);
264
265        let path = workspace.workdir().join(&filename);
266        let content = b"some content for input";
267        fs::write(&path, content).await.expect("Failed to write input file");
268
269        let result = workspace.clone().locate_batch_files(&index).await;
270        debug!("Result: {:?}", result);
271        assert!(result.is_ok(), "Locating single input file should succeed");
272
273        let triple_option = result.unwrap();
274        assert!(triple_option.is_some(), "Expected to find a triple with the input file");
275        let triple = triple_option.unwrap();
276        pretty_assert_eq!(*triple.index(), index, "Index should match");
277        pretty_assert_eq!(*triple.input(), Some(path.clone()));
278        assert!(triple.output().is_none(), "No output file");
279        assert!(triple.error().is_none(), "No error file");
280        assert!(triple.associated_metadata().is_none(), "No metadata file");
281
282        info!("Finished test: locates_single_input_file");
283    }
284
285    /// Ensures we can locate a single output file with no other files present.
286    #[traced_test]
287    async fn locates_single_output_file() {
288        info!("Starting test: locates_single_output_file");
289        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
290        let index = BatchIndex::Usize(11);
291        let filename = format!("batch_output_{}.jsonl", 11);
292
293        let path = workspace.workdir().join(&filename);
294        let content = b"some output data";
295        fs::write(&path, content).await.expect("Failed to write output file");
296
297        let result = workspace.clone().locate_batch_files(&index).await;
298        debug!("Result: {:?}", result);
299        assert!(result.is_ok());
300
301        let triple_option = result.unwrap();
302        assert!(triple_option.is_some(), "Should find a triple with the output file only");
303        let triple = triple_option.unwrap();
304        pretty_assert_eq!(*triple.index(), index);
305        assert!(triple.input().is_none());
306        pretty_assert_eq!(*triple.output(), Some(path.clone()));
307        assert!(triple.error().is_none());
308        assert!(triple.associated_metadata().is_none());
309
310        info!("Finished test: locates_single_output_file");
311    }
312
313    /// Ensures we can locate a single error file with no other files present.
314    #[traced_test]
315    async fn locates_single_error_file() {
316        info!("Starting test: locates_single_error_file");
317        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
318        let index = BatchIndex::Usize(12);
319        let filename = format!("batch_error_{}.jsonl", 12);
320
321        let path = workspace.workdir().join(&filename);
322        fs::write(&path, b"some error data").await.expect("Failed to write error file");
323
324        let result = workspace.clone().locate_batch_files(&index).await;
325        debug!("Result: {:?}", result);
326
327        assert!(result.is_ok());
328        let triple_option = result.unwrap();
329        assert!(triple_option.is_some());
330        let triple = triple_option.unwrap();
331        pretty_assert_eq!(*triple.index(), index);
332        assert!(triple.input().is_none());
333        assert!(triple.output().is_none());
334        pretty_assert_eq!(*triple.error(), Some(path.clone()));
335        assert!(triple.associated_metadata().is_none());
336
337        info!("Finished test: locates_single_error_file");
338    }
339
340    /// Ensures we can locate a single metadata file with no other files present.
341    #[traced_test]
342    async fn locates_single_metadata_file() {
343        info!("Starting test: locates_single_metadata_file");
344        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
345        let index = BatchIndex::Usize(13);
346        let filename = format!("batch_metadata_{}.jsonl", 13);
347
348        let path = workspace.workdir().join(&filename);
349        fs::write(&path, b"some metadata info").await.expect("Failed to write metadata file");
350
351        let result = workspace.clone().locate_batch_files(&index).await;
352        debug!("Result: {:?}", result);
353
354        assert!(result.is_ok());
355        let triple_option = result.unwrap();
356        assert!(triple_option.is_some());
357        let triple = triple_option.unwrap();
358        pretty_assert_eq!(*triple.index(), index);
359        assert!(triple.input().is_none());
360        assert!(triple.output().is_none());
361        assert!(triple.error().is_none());
362        pretty_assert_eq!(*triple.associated_metadata(), Some(path.clone()));
363
364        info!("Finished test: locates_single_metadata_file");
365    }
366
367    /// Ensures the method can handle partial sets of files (e.g., input + output, or input + error, etc.).
368    #[traced_test]
369    async fn finds_partial_set_of_files() {
370        info!("Starting test: finds_partial_set_of_files");
371        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
372        let index = BatchIndex::Usize(30);
373
374        let input_path = workspace.workdir().join(format!("batch_input_{}.jsonl", 30));
375        let output_path = workspace.workdir().join(format!("batch_output_{}.jsonl", 30));
376        fs::write(&input_path, b"input data").await.expect("Failed to write input file");
377        fs::write(&output_path, b"output data").await.expect("Failed to write output file");
378
379        let result = workspace.clone().locate_batch_files(&index).await;
380        assert!(result.is_ok(), "Should succeed with partial set of files");
381        let triple_option = result.unwrap();
382        assert!(triple_option.is_some(), "Expect Some(...)");
383        let triple = triple_option.unwrap();
384        pretty_assert_eq!(*triple.index(), index);
385        pretty_assert_eq!(*triple.input(), Some(input_path));
386        pretty_assert_eq!(*triple.output(), Some(output_path));
387        assert!(triple.error().is_none());
388        assert!(triple.associated_metadata().is_none());
389
390        info!("Finished test: finds_partial_set_of_files");
391    }
392
393    /// Ensures that unrecognized filenames that do match partial patterns but have invalid capturing groups are skipped.
394    #[traced_test]
395    async fn ignores_unrecognized_filenames() {
396        info!("Starting test: ignores_unrecognized_filenames");
397        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
398        let index = BatchIndex::Usize(44);
399
400        // We'll create a file that might partially match the pattern but has an unknown group
401        // e.g. "batch_foo_44.jsonl" which is not input|output|error|metadata
402        let path = workspace.workdir().join("batch_foo_44.jsonl");
403        fs::write(&path, b"unknown type").await.expect("Failed to write unknown file");
404
405        // Also create a valid input
406        let valid_input = workspace.workdir().join("batch_input_44.jsonl");
407        fs::write(&valid_input, b"some input").await.expect("Failed to write input file");
408
409        let result = workspace.clone().locate_batch_files(&index).await;
410        debug!("Result: {:?}", result);
411
412        // We expect to find the valid input file, ignoring the "foo" file
413        assert!(result.is_ok());
414        let triple_option = result.unwrap();
415        assert!(triple_option.is_some());
416        let triple = triple_option.unwrap();
417        pretty_assert_eq!(*triple.index(), index);
418        pretty_assert_eq!(*triple.input(), Some(valid_input));
419        assert!(triple.output().is_none());
420        assert!(triple.error().is_none());
421        assert!(triple.associated_metadata().is_none());
422        info!("Finished test: ignores_unrecognized_filenames");
423    }
424
425    /// Ensures that the logic also works with a UUID-based index.
426    #[traced_test]
427    async fn locates_uuid_based_index_files() {
428        info!("Starting test: locates_uuid_based_index_files");
429        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
430        let uuid_str = "550e8400-e29b-41d4-a716-446655440000";
431        let index = BatchIndex::from_uuid_str(uuid_str).expect("Failed to create batch index from uuid");
432
433        let file_name = format!("batch_output_{}.jsonl", uuid_str);
434        let path = workspace.workdir().join(&file_name);
435        fs::write(&path, b"uuid output data").await.expect("Failed to write uuid-based file");
436
437        let result = workspace.clone().locate_batch_files(&index).await;
438        debug!("Result: {:?}", result);
439
440        assert!(result.is_ok());
441        let triple_option = result.unwrap();
442        assert!(triple_option.is_some());
443        let triple = triple_option.unwrap();
444        pretty_assert_eq!(*triple.index(), index);
445        assert!(triple.input().is_none());
446        pretty_assert_eq!(*triple.output(), Some(path.clone()));
447        assert!(triple.error().is_none());
448        assert!(triple.associated_metadata().is_none());
449
450        info!("Finished test: locates_uuid_based_index_files");
451    }
452
453    /// Ensures concurrency checks: multiple tasks calling locate_batch_files on the same workspace
454    #[traced_test]
455    async fn concurrent_locate_batch_files() {
456        info!("Starting test: concurrent_locate_batch_files");
457        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
458        let index = BatchIndex::Usize(88);
459
460        // Place one input file for index=88
461        let input_name = format!("batch_input_{}.jsonl", 88);
462        let input_path = workspace.workdir().join(&input_name);
463        fs::write(&input_path, b"concurrent test input data").await.expect("Failed to write input file");
464
465        // We'll spawn multiple tasks that attempt to locate batch files for the same index
466        let arc_ws = workspace.clone();
467        let mut tasks = Vec::new();
468        for i in 0..5 {
469            let ws_clone = arc_ws.clone();
470            let index_clone = index.clone();
471            tasks.push(tokio::spawn(async move {
472                trace!("Task {} locating files for index=88", i);
473                ws_clone.locate_batch_files(&index_clone).await
474            }));
475        }
476
477        let results = futures::future::join_all(tasks).await;
478        for (i, res) in results.into_iter().enumerate() {
479            match res {
480                Ok(Ok(Some(triple))) => {
481                    debug!("Task {} => triple found with input: {:?}", i, triple.input());
482                    pretty_assert_eq!(*triple.index(), index, "Index must match");
483                }
484                Ok(Ok(None)) => panic!("Task {} => unexpected None, we have an input file!", i),
485                Ok(Err(e)) => panic!("Task {} => unexpected error: {:?}", i, e),
486                Err(e) => panic!("Task {} => join error: {:?}", i, e),
487            }
488        }
489
490        info!("Finished test: concurrent_locate_batch_files");
491    }
492
493    #[cfg(all(unix, not(target_os = "macos")))]
494    #[traced_test]
495    async fn gracefully_skips_non_utf8_filenames() {
496        info!("Starting test: gracefully_skips_non_utf8_filenames");
497        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
498        let wd = workspace.workdir();
499
500        // We'll create a file that might partially match the pattern but has invalid UTF-8
501        // in its name, which we skip.
502        use std::os::unix::ffi::OsStrExt;
503        let invalid_name = std::ffi::OsStr::from_bytes(b"batch_input_31\xFF.jsonl");
504        let path = wd.join(invalid_name);
505        let _ = std::fs::File::create(&path)
506            .expect("Failed to create non-UTF8 file on non-macOS Unix");
507
508        // Also create a valid file
509        let valid_file = wd.join("batch_input_31.jsonl");
510        fs::write(&valid_file, b"input data").await.expect("Failed to write valid input file");
511
512        let result = workspace.clone().locate_batch_files(&BatchIndex::Usize(31)).await;
513        debug!("Result: {:?}", result);
514
515        // The presence of the valid file should yield a triple with input.
516        assert!(result.is_ok(), "Should succeed, ignoring the non-UTF8 named file if any");
517        let triple_option = result.unwrap();
518        assert!(triple_option.is_some());
519        let triple = triple_option.unwrap();
520        pretty_assert_eq!(*triple.index(), BatchIndex::Usize(31));
521        pretty_assert_eq!(*triple.input(), Some(valid_file));
522
523        info!("Finished test: gracefully_skips_non_utf8_filenames");
524    }
525
526    #[traced_test]
527    async fn test_locate_batch_files_ignores_invalid_files() -> Result<(),BatchWorkspaceError> {
528        let workspace = BatchWorkspace::new_temp().await?;
529        let workdir   = workspace.workdir();
530
531        // Write one valid input file
532        fs::write(workdir.join("batch_input_4.jsonl"), b"test").await?;
533        // Instead of "batch_input_4_duplicate.jsonl", rename the second file so it won't match:
534        fs::write(workdir.join("batch_inp_4_duplicate.jsonl"), b"test").await?;
535
536        let result = workspace.clone().locate_batch_files(&BatchIndex::Usize(4)).await?;
537        assert!(result.is_some(), "Expected to find the valid batch input file");
538
539        let batch_files = result.unwrap();
540        pretty_assert_eq!(*batch_files.input(), Some(workdir.join("batch_input_4.jsonl")));
541        assert!(batch_files.output().is_none());
542        assert!(batch_files.error().is_none());
543
544        Ok(())
545    }
546
547    // 7a) Fails if multiple input => rename "batch_input_20_extra.jsonl" to "batch_inp_20_extra.jsonl"
548    #[traced_test]
549    async fn fails_if_multiple_input_files_found() {
550        info!("Starting revised test: fails_if_multiple_input_files_found");
551
552        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
553        let index = BatchIndex::Usize(20);
554
555        // Valid:
556        let valid_path = workspace.workdir().join("batch_input_20.jsonl");
557        fs::write(&valid_path, b"first input").await.expect("Failed to write first input file");
558
559        // 'Extra' that doesn't match because we renamed 'input' => 'inp':
560        let extra_path = workspace.workdir().join("batch_inp_20_extra.jsonl");
561        fs::write(&extra_path, b"second input").await.expect("Failed to write second input file");
562
563        debug!("Invoking locate_batch_files for index=20");
564        let result = workspace.clone().locate_batch_files(&index).await;
565        debug!("Result: {:?}", result);
566
567        // Now it should succeed, ignoring the 'batch_inp_20_extra.jsonl' as an invalid pattern.
568        assert!(result.is_ok(), "Should succeed (the 'extra' file is ignored).");
569        let triple_opt = result.unwrap();
570        assert!(triple_opt.is_some());
571        let triple = triple_opt.unwrap();
572        pretty_assert_eq!(*triple.index(), index);
573        pretty_assert_eq!(*triple.input(), Some(valid_path.clone()));
574        assert!(triple.output().is_none());
575        assert!(triple.error().is_none());
576
577        info!("Finished revised test: fails_if_multiple_input_files_found => no error for extra file");
578    }
579
580    // 7b) Fails if multiple output => rename "batch_output_21_extra.jsonl" => "batch_out_21_extra.jsonl"
581    #[traced_test]
582    async fn fails_if_multiple_output_files_found() {
583        info!("Starting revised test: fails_if_multiple_output_files_found");
584
585        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
586        let index = BatchIndex::Usize(21);
587
588        // We'll keep "batch_output_21.jsonl" as the valid file
589        let file1 = workspace.workdir().join("batch_output_21.jsonl");
590        fs::write(&file1, b"output file #1").await.expect("Failed to write output file #1");
591
592        // rename the 'extra' so it doesn't match:
593        let file2 = workspace.workdir().join("batch_out_21_extra.jsonl");
594        fs::write(&file2, b"output file #2").await.expect("Failed to write output file #2");
595
596        debug!("Invoking locate_batch_files for index=21");
597        let result = workspace.clone().locate_batch_files(&index).await;
598        debug!("Result: {:?}", result);
599
600        // The second file won't match => no duplication => success.
601        assert!(result.is_ok());
602        let triple_opt = result.unwrap();
603        assert!(triple_opt.is_some());
604        let triple = triple_opt.unwrap();
605        pretty_assert_eq!(*triple.index(), index);
606        pretty_assert_eq!(*triple.output(), Some(file1.clone()));
607        assert!(triple.input().is_none());
608        assert!(triple.error().is_none());
609
610        info!("Finished revised test: fails_if_multiple_output_files_found => no error for extra file");
611    }
612
613    // 7c) Fails if multiple error => rename "batch_error_22_extra.jsonl" => "batch_err_22_extra.jsonl"
614    #[traced_test]
615    async fn fails_if_multiple_error_files_found() {
616        info!("Starting revised test: fails_if_multiple_error_files_found");
617
618        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
619        let index = BatchIndex::Usize(22);
620
621        let err1 = workspace.workdir().join("batch_error_22.jsonl");
622        fs::write(&err1, b"error file #1").await.expect("Failed to write error file #1");
623
624        // rename 'extra' => 'err_22_extra' => won't match
625        let err2 = workspace.workdir().join("batch_err_22_extra.jsonl");
626        fs::write(&err2, b"error file #2").await.expect("Failed to write error file #2");
627
628        debug!("Invoking locate_batch_files for index=22");
629        let result = workspace.clone().locate_batch_files(&index).await;
630        debug!("Result: {:?}", result);
631
632        // The second file is not recognized => only one error => no error thrown.
633        assert!(result.is_ok());
634        let triple_opt = result.unwrap();
635        assert!(triple_opt.is_some());
636        let triple = triple_opt.unwrap();
637        pretty_assert_eq!(*triple.index(), index);
638        pretty_assert_eq!(*triple.error(), Some(err1.clone()));
639        assert!(triple.input().is_none());
640        assert!(triple.output().is_none());
641
642        info!("Finished revised test: fails_if_multiple_error_files_found => no error for extra file");
643    }
644
645    // 7d) Fails if multiple metadata => rename "batch_metadata_23_extra.jsonl" => "batch_meta_23_extra.jsonl"
646    #[traced_test]
647    async fn fails_if_multiple_metadata_files_found() {
648        info!("Starting revised test: fails_if_multiple_metadata_files_found");
649
650        let workspace = BatchWorkspace::new_temp().await.expect("Failed to create temp workspace");
651        let index = BatchIndex::Usize(23);
652
653        // A valid file:
654        let path_valid = workspace.workdir().join("batch_metadata_23.jsonl");
655        fs::write(&path_valid, b"metadata #1").await.expect("Failed to write metadata file #1");
656
657        // rename 'extra' => 'meta_23_extra' => won't match
658        let path_extra = workspace.workdir().join("batch_meta_23_extra.jsonl");
659        fs::write(&path_extra, b"metadata #2").await.expect("Failed to write metadata file #2");
660
661        debug!("Invoking locate_batch_files for index=23");
662        let result = workspace.clone().locate_batch_files(&index).await;
663        debug!("Result: {:?}", result);
664
665        // Because 'batch_meta_23_extra.jsonl' doesn't match, we see only the valid one => no duplication => success.
666        assert!(result.is_ok(), "Should succeed (the 'extra' file is ignored).");
667        let triple_opt = result.unwrap();
668        assert!(triple_opt.is_some(), "We expect at least the valid file to be recognized.");
669        let triple = triple_opt.unwrap();
670        pretty_assert_eq!(*triple.index(), index);
671        pretty_assert_eq!(*triple.associated_metadata(), Some(path_valid.clone()));
672
673        info!("Finished revised test: fails_if_multiple_metadata_files_found => no error for extra file");
674    }
675}