Skip to main content

diskann_benchmark_runner/
checker.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6use std::{
7    collections::HashSet,
8    path::{Path, PathBuf},
9};
10
11use crate::Any;
12
13/// Shared context for resolving input and output files paths post deserialization.
14#[derive(Debug)]
15pub struct Checker {
16    /// Root directories in which to look for files.
17    ///
18    /// Loading input files will first look to see if the input file is an absolute path.
19    /// If so, the absolute path will be used.
20    ///
21    /// Otherwise, the search directories are traversed from beginning to end.
22    search_directories: Vec<PathBuf>,
23
24    /// Root directory (only one permitted) to write output files into
25    /// and check for output files
26    output_directory: Option<PathBuf>,
27
28    /// The collection of output directories registered so far with the checker.
29    ///
30    /// This ensures that each job uses a distinct output directory to avoid conflicts.
31    current_outputs: HashSet<PathBuf>,
32
33    /// This crate-private variable is used to store the current input deserialization
34    /// tag and is referenced when creating new `Any` objects.
35    ///
36    /// Ensure that the correct tag is present before invoking [`Input::try_deserialize`].
37    tag: Option<&'static str>,
38}
39
40impl Checker {
41    /// Create a new checker with the list of search directories..
42    pub(crate) fn new(search_directories: Vec<PathBuf>, output_directory: Option<PathBuf>) -> Self {
43        Self {
44            search_directories,
45            output_directory,
46            current_outputs: HashSet::new(),
47            tag: None,
48        }
49    }
50
51    /// Invoke [`CheckDeserialization`] on `value` and if successful, package it in [`Any`].
52    pub fn any<T>(&mut self, mut value: T) -> anyhow::Result<Any>
53    where
54        T: serde::Serialize + CheckDeserialization + std::fmt::Debug + 'static,
55    {
56        value.check_deserialization(self)?;
57        Ok(Any::new(value, self.tag.expect("tag must be set")))
58    }
59
60    /// Return the ordered list of search directories registered with the [`Checker`].
61    pub fn search_directories(&self) -> &[PathBuf] {
62        &self.search_directories
63    }
64
65    /// Return the output directory registered with the [`Checker`], if any.
66    pub fn output_directory(&self) -> Option<&PathBuf> {
67        self.output_directory.as_ref()
68    }
69
70    /// Register `save_path` as an output directory and resolve `save_path` to an absolute path.
71    ///
72    /// # NOTE
73    ///
74    /// The behavior of this function is expected to change in the near future.
75    pub fn register_output(&mut self, save_path: Option<&Path>) -> anyhow::Result<PathBuf> {
76        // Check if `save_path` is absolute or relative. If relative, resolve it to an absolute
77        // path using `self.output_directory.
78        let resolved_dir = match save_path {
79            None => {
80                if let Some(output_dir) = self.output_directory() {
81                    output_dir.clone()
82                } else {
83                    return Err(anyhow::Error::msg(
84                        "relative save path \"{}\" specified but no output directory was provided",
85                    ));
86                }
87            }
88            Some(save_path) => {
89                if save_path.is_absolute() {
90                    if !(save_path.is_dir()) {
91                        return Err(anyhow::Error::msg(format!(
92                            "absolute save path \"{}\" is not a valid directory",
93                            save_path.display()
94                        )));
95                    }
96                    save_path.to_path_buf()
97                } else {
98                    // relative path, we concatenate it with the output directory
99                    if let Some(output_dir) = self.output_directory() {
100                        let absolute = output_dir.join(save_path);
101                        if !absolute.is_dir() {
102                            return Err(anyhow::Error::msg(format!(
103                                "relative save path \"{}\" is not a valid directory when combined with output directory \"{}\"",
104                                save_path.display(),
105                                output_dir.display()
106                            )));
107                        }
108                        absolute
109                    } else {
110                        return Err(anyhow::Error::msg(format!(
111                            "relative save path \"{}\" specified but no output directory was provided",
112                            save_path.display()
113                        )));
114                    }
115                }
116            }
117        };
118
119        // If the resolved directory already exists - bail.
120        if !self.current_outputs.insert(resolved_dir.clone()) {
121            anyhow::bail!(
122                "output directory {} already being used by another job",
123                resolved_dir.display()
124            );
125        } else {
126            Ok(resolved_dir)
127        }
128    }
129
130    /// Try to resolve `path` using the following approach:
131    ///
132    /// 1. If `path` is absolute - check that it exists and is a valid file. If
133    ///    successful, return `path` unaltered.
134    ///
135    /// 2. If `path` is relative, work through `self.search_directories()` in order,
136    ///    returning the absolute path first existing file.
137    pub fn check_path(&self, path: &Path) -> Result<PathBuf, anyhow::Error> {
138        // Check if the file exists (allowing for relative paths with respect to the current
139        // directory.
140        //
141        // If the path is an absolute path and the file does not exist, then bail.
142        if path.is_absolute() {
143            if path.is_file() {
144                return Ok(path.into());
145            } else {
146                return Err(anyhow::Error::msg(format!(
147                    "input file with absolute path \"{}\" either does not exist or is not a file",
148                    path.display()
149                )));
150            }
151        };
152
153        // At this point, start searching in the provided directories.
154        for dir in self.search_directories() {
155            let absolute = dir.join(path);
156            if absolute.is_file() {
157                return Ok(absolute);
158            }
159        }
160        Err(anyhow::Error::msg(format!(
161            "could not find input file \"{}\" in the search directories \"{:?}\"",
162            path.display(),
163            self.search_directories(),
164        )))
165    }
166
167    pub(crate) fn set_tag(&mut self, tag: &'static str) {
168        let _ = self.tag.insert(tag);
169    }
170}
171
172/// Perform post-process resolution of input and output files paths.
173pub trait CheckDeserialization {
174    /// Perform any necessary resolution of file paths, returning an error if a problem is
175    /// discovered.
176    fn check_deserialization(&mut self, checker: &mut Checker) -> Result<(), anyhow::Error>;
177}
178
179///////////
180// Tests //
181///////////
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    use std::fs::{create_dir, File};
188
189    #[test]
190    fn test_constructor() {
191        let checker = Checker::new(Vec::new(), None);
192        assert!(checker.search_directories().is_empty());
193        assert!(checker.output_directory().is_none());
194
195        let dir_a: PathBuf = "directory/a".into();
196        let dir_b: PathBuf = "directory/another/b".into();
197
198        let checker = Checker::new(vec![dir_a.clone()], Some(dir_b.clone()));
199        assert_eq!(checker.search_directories(), vec![dir_a.clone()]);
200        assert_eq!(checker.output_directory(), Some(&dir_b));
201
202        let checker = Checker::new(vec![dir_a.clone(), dir_b.clone()], None);
203        assert_eq!(
204            checker.search_directories(),
205            vec![dir_a.clone(), dir_b.clone()]
206        );
207        assert!(checker.output_directory().is_none());
208    }
209
210    #[test]
211    fn test_check_path() {
212        // We create a directory that looks like this:
213        //
214        // dir/
215        //     file_a.txt
216        //     dir0/
217        //        file_b.txt
218        //     dir1/
219        //        file_c.txt
220        //        dir0/
221        //           file_c.txt
222        let dir = tempfile::tempdir().unwrap();
223        let path = dir.path();
224
225        File::create(path.join("file_a.txt")).unwrap();
226        println!("{}", path.join("file_a.txt").is_file());
227
228        create_dir(path.join("dir0")).unwrap();
229        create_dir(path.join("dir1")).unwrap();
230        create_dir(path.join("dir1/dir0")).unwrap();
231        File::create(path.join("dir0/file_b.txt")).unwrap();
232        File::create(path.join("dir1/file_c.txt")).unwrap();
233        File::create(path.join("dir1/dir0/file_c.txt")).unwrap();
234
235        let make_checker = |paths: &[PathBuf]| -> Checker { Checker::new(paths.to_vec(), None) };
236
237        // Test absolute path success.
238        {
239            let checker = make_checker(&[]);
240            let absolute = path.join("file_a.txt");
241            assert_eq!(
242                checker.check_path(&absolute).unwrap(),
243                absolute,
244                "absolute paths should be unmodified if they exist",
245            );
246
247            let absolute = path.join("dir0/file_b.txt");
248            assert_eq!(
249                checker.check_path(&absolute).unwrap(),
250                absolute,
251                "absolute paths should be unmodified if they exist",
252            );
253        }
254
255        // Absolute path fail.
256        {
257            let checker = make_checker(&[]);
258            let absolute = path.join("dir0/file_c.txt");
259            let err = checker.check_path(&absolute).unwrap_err();
260            let message = err.to_string();
261            assert!(message.contains("input file with absolute path"));
262            assert!(message.contains("either does not exist or is not a file"));
263        }
264
265        // Directory search
266        {
267            let checker =
268                make_checker(&[path.join("dir1/dir0"), path.join("dir1"), path.join("dir0")]);
269
270            // Directories are searched in order.
271            let file = &Path::new("file_c.txt");
272            let resolved = checker.check_path(file).unwrap();
273            assert_eq!(resolved, path.join("dir1/dir0/file_c.txt"));
274
275            let file = &Path::new("file_b.txt");
276            let resolved = checker.check_path(file).unwrap();
277            assert_eq!(resolved, path.join("dir0/file_b.txt"));
278
279            // Directory search can fail.
280            let file = &Path::new("file_a.txt");
281            let err = checker.check_path(file).unwrap_err();
282            let message = err.to_string();
283            assert!(message.contains("could not find input file"));
284            assert!(message.contains("in the search directories"));
285
286            // If we give an absolute path, no directory search is performed.
287            let file = path.join("file_c.txt");
288            let err = checker.check_path(&file).unwrap_err();
289            let message = err.to_string();
290            assert!(message.starts_with("input file with absolute path"));
291        }
292    }
293}