Skip to main content

diskann_benchmark_runner/
checker.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6use std::{
7    collections::HashSet,
8    path::{Path, PathBuf},
9};
10
11use crate::Any;
12
13/// Shared context for resolving input and output files paths post deserialization.
14#[derive(Debug)]
15pub struct Checker {
16    /// Root directories in which to look for files.
17    ///
18    /// Loading input files will first look to see if the input file is an absolute path.
19    /// If so, the absolute path will be used.
20    ///
21    /// Otherwise, the search directories are traversed from beginning to end.
22    search_directories: Vec<PathBuf>,
23
24    /// Root directory (only one permitted) to write output files into
25    /// and check for output files
26    output_directory: Option<PathBuf>,
27
28    /// The collection of output directories registered so far with the checker.
29    ///
30    /// This ensures that each job uses a distinct output directory to avoid conflicts.
31    current_outputs: HashSet<PathBuf>,
32
33    /// This crate-private variable is used to store the current input deserialization
34    /// tag and is referenced when creating new `Any` objects.
35    ///
36    /// Ensure that the correct tag is present before invoking [`Input::try_deserialize`].
37    tag: Option<&'static str>,
38}
39
40impl Checker {
41    /// Create a new checker with the list of search directories..
42    pub(crate) fn new(search_directories: Vec<PathBuf>, output_directory: Option<PathBuf>) -> Self {
43        Self {
44            search_directories,
45            output_directory,
46            current_outputs: HashSet::new(),
47            tag: None,
48        }
49    }
50
51    /// Invoke [`CheckDeserialization`] on `value` and if successful, package it in [`Any`].
52    pub fn any<T>(&mut self, mut value: T) -> anyhow::Result<Any>
53    where
54        T: serde::Serialize + CheckDeserialization + std::fmt::Debug + 'static,
55    {
56        value.check_deserialization(self)?;
57        #[expect(
58            clippy::expect_used,
59            reason = "crate infrastructure ensures an untagged Checker is not leaked"
60        )]
61        Ok(Any::new(value, self.tag.expect("tag must be set")))
62    }
63
64    /// Return the ordered list of search directories registered with the [`Checker`].
65    pub fn search_directories(&self) -> &[PathBuf] {
66        &self.search_directories
67    }
68
69    /// Return the output directory registered with the [`Checker`], if any.
70    pub fn output_directory(&self) -> Option<&PathBuf> {
71        self.output_directory.as_ref()
72    }
73
74    /// Register `save_path` as an output directory and resolve `save_path` to an absolute path.
75    ///
76    /// # NOTE
77    ///
78    /// The behavior of this function is expected to change in the near future.
79    pub fn register_output(&mut self, save_path: Option<&Path>) -> anyhow::Result<PathBuf> {
80        // Check if `save_path` is absolute or relative. If relative, resolve it to an absolute
81        // path using `self.output_directory.
82        let resolved_dir = match save_path {
83            None => {
84                if let Some(output_dir) = self.output_directory() {
85                    output_dir.clone()
86                } else {
87                    return Err(anyhow::Error::msg(
88                        "relative save path \"{}\" specified but no output directory was provided",
89                    ));
90                }
91            }
92            Some(save_path) => {
93                if save_path.is_absolute() {
94                    if !(save_path.is_dir()) {
95                        return Err(anyhow::Error::msg(format!(
96                            "absolute save path \"{}\" is not a valid directory",
97                            save_path.display()
98                        )));
99                    }
100                    save_path.to_path_buf()
101                } else {
102                    // relative path, we concatenate it with the output directory
103                    if let Some(output_dir) = self.output_directory() {
104                        let absolute = output_dir.join(save_path);
105                        if !absolute.is_dir() {
106                            return Err(anyhow::Error::msg(format!(
107                                "relative save path \"{}\" is not a valid directory when combined with output directory \"{}\"",
108                                save_path.display(),
109                                output_dir.display()
110                            )));
111                        }
112                        absolute
113                    } else {
114                        return Err(anyhow::Error::msg(format!(
115                            "relative save path \"{}\" specified but no output directory was provided",
116                            save_path.display()
117                        )));
118                    }
119                }
120            }
121        };
122
123        // If the resolved directory already exists - bail.
124        if !self.current_outputs.insert(resolved_dir.clone()) {
125            anyhow::bail!(
126                "output directory {} already being used by another job",
127                resolved_dir.display()
128            );
129        } else {
130            Ok(resolved_dir)
131        }
132    }
133
134    /// Try to resolve `path` using the following approach:
135    ///
136    /// 1. If `path` is absolute - check that it exists and is a valid file. If
137    ///    successful, return `path` unaltered.
138    ///
139    /// 2. If `path` is relative, work through `self.search_directories()` in order,
140    ///    returning the absolute path first existing file.
141    pub fn check_path(&self, path: &Path) -> Result<PathBuf, anyhow::Error> {
142        // Check if the file exists (allowing for relative paths with respect to the current
143        // directory.
144        //
145        // If the path is an absolute path and the file does not exist, then bail.
146        if path.is_absolute() {
147            if path.is_file() {
148                return Ok(path.into());
149            } else {
150                return Err(anyhow::Error::msg(format!(
151                    "input file with absolute path \"{}\" either does not exist or is not a file",
152                    path.display()
153                )));
154            }
155        };
156
157        // At this point, start searching in the provided directories.
158        for dir in self.search_directories() {
159            let absolute = dir.join(path);
160            if absolute.is_file() {
161                return Ok(absolute);
162            }
163        }
164        Err(anyhow::Error::msg(format!(
165            "could not find input file \"{}\" in the search directories \"{:?}\"",
166            path.display(),
167            self.search_directories(),
168        )))
169    }
170
171    pub(crate) fn set_tag(&mut self, tag: &'static str) {
172        let _ = self.tag.insert(tag);
173    }
174}
175
176/// Perform post-process resolution of input and output files paths.
177pub trait CheckDeserialization {
178    /// Perform any necessary resolution of file paths, returning an error if a problem is
179    /// discovered.
180    fn check_deserialization(&mut self, checker: &mut Checker) -> Result<(), anyhow::Error>;
181}
182
183///////////
184// Tests //
185///////////
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    use std::fs::{create_dir, File};
192
193    #[test]
194    fn test_constructor() {
195        let checker = Checker::new(Vec::new(), None);
196        assert!(checker.search_directories().is_empty());
197        assert!(checker.output_directory().is_none());
198
199        let dir_a: PathBuf = "directory/a".into();
200        let dir_b: PathBuf = "directory/another/b".into();
201
202        let checker = Checker::new(vec![dir_a.clone()], Some(dir_b.clone()));
203        assert_eq!(checker.search_directories(), vec![dir_a.clone()]);
204        assert_eq!(checker.output_directory(), Some(&dir_b));
205
206        let checker = Checker::new(vec![dir_a.clone(), dir_b.clone()], None);
207        assert_eq!(
208            checker.search_directories(),
209            vec![dir_a.clone(), dir_b.clone()]
210        );
211        assert!(checker.output_directory().is_none());
212    }
213
214    #[test]
215    fn test_check_path() {
216        // We create a directory that looks like this:
217        //
218        // dir/
219        //     file_a.txt
220        //     dir0/
221        //        file_b.txt
222        //     dir1/
223        //        file_c.txt
224        //        dir0/
225        //           file_c.txt
226        let dir = tempfile::tempdir().unwrap();
227        let path = dir.path();
228
229        File::create(path.join("file_a.txt")).unwrap();
230        println!("{}", path.join("file_a.txt").is_file());
231
232        create_dir(path.join("dir0")).unwrap();
233        create_dir(path.join("dir1")).unwrap();
234        create_dir(path.join("dir1/dir0")).unwrap();
235        File::create(path.join("dir0/file_b.txt")).unwrap();
236        File::create(path.join("dir1/file_c.txt")).unwrap();
237        File::create(path.join("dir1/dir0/file_c.txt")).unwrap();
238
239        let make_checker = |paths: &[PathBuf]| -> Checker { Checker::new(paths.to_vec(), None) };
240
241        // Test absolute path success.
242        {
243            let checker = make_checker(&[]);
244            let absolute = path.join("file_a.txt");
245            assert_eq!(
246                checker.check_path(&absolute).unwrap(),
247                absolute,
248                "absolute paths should be unmodified if they exist",
249            );
250
251            let absolute = path.join("dir0/file_b.txt");
252            assert_eq!(
253                checker.check_path(&absolute).unwrap(),
254                absolute,
255                "absolute paths should be unmodified if they exist",
256            );
257        }
258
259        // Absolute path fail.
260        {
261            let checker = make_checker(&[]);
262            let absolute = path.join("dir0/file_c.txt");
263            let err = checker.check_path(&absolute).unwrap_err();
264            let message = err.to_string();
265            assert!(message.contains("input file with absolute path"));
266            assert!(message.contains("either does not exist or is not a file"));
267        }
268
269        // Directory search
270        {
271            let checker =
272                make_checker(&[path.join("dir1/dir0"), path.join("dir1"), path.join("dir0")]);
273
274            // Directories are searched in order.
275            let file = &Path::new("file_c.txt");
276            let resolved = checker.check_path(file).unwrap();
277            assert_eq!(resolved, path.join("dir1/dir0/file_c.txt"));
278
279            let file = &Path::new("file_b.txt");
280            let resolved = checker.check_path(file).unwrap();
281            assert_eq!(resolved, path.join("dir0/file_b.txt"));
282
283            // Directory search can fail.
284            let file = &Path::new("file_a.txt");
285            let err = checker.check_path(file).unwrap_err();
286            let message = err.to_string();
287            assert!(message.contains("could not find input file"));
288            assert!(message.contains("in the search directories"));
289
290            // If we give an absolute path, no directory search is performed.
291            let file = path.join("file_c.txt");
292            let err = checker.check_path(&file).unwrap_err();
293            let message = err.to_string();
294            assert!(message.starts_with("input file with absolute path"));
295        }
296    }
297}