Skip to main content

diskann_benchmark_runner/
checker.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6use std::{
7    collections::HashSet,
8    path::{Path, PathBuf},
9};
10
11/// Shared context for resolving input and output files paths post deserialization.
12#[derive(Debug)]
13pub struct Checker {
14    /// Root directories in which to look for files.
15    ///
16    /// Loading input files will first look to see if the input file is an absolute path.
17    /// If so, the absolute path will be used.
18    ///
19    /// Otherwise, the search directories are traversed from beginning to end.
20    search_directories: Vec<PathBuf>,
21
22    /// Root directory (only one permitted) to write output files into
23    /// and check for output files
24    output_directory: Option<PathBuf>,
25
26    /// The collection of output directories registered so far with the checker.
27    ///
28    /// This ensures that each job uses a distinct output directory to avoid conflicts.
29    current_outputs: HashSet<PathBuf>,
30}
31
32impl Checker {
33    /// Create a new checker with the list of search directories..
34    pub(crate) fn new(search_directories: Vec<PathBuf>, output_directory: Option<PathBuf>) -> Self {
35        Self {
36            search_directories,
37            output_directory,
38            current_outputs: HashSet::new(),
39        }
40    }
41
42    /// Return the ordered list of search directories registered with the [`Checker`].
43    pub fn search_directories(&self) -> &[PathBuf] {
44        &self.search_directories
45    }
46
47    /// Return the output directory registered with the [`Checker`], if any.
48    pub fn output_directory(&self) -> Option<&PathBuf> {
49        self.output_directory.as_ref()
50    }
51
52    /// Register `save_path` as an output directory and resolve `save_path` to an absolute path.
53    ///
54    /// # NOTE
55    ///
56    /// The behavior of this function is expected to change in the near future.
57    pub fn register_output(&mut self, save_path: Option<&Path>) -> anyhow::Result<PathBuf> {
58        // Check if `save_path` is absolute or relative. If relative, resolve it to an absolute
59        // path using `self.output_directory.
60        let resolved_dir = match save_path {
61            None => {
62                if let Some(output_dir) = self.output_directory() {
63                    output_dir.clone()
64                } else {
65                    return Err(anyhow::Error::msg(
66                        "relative save path \"{}\" specified but no output directory was provided",
67                    ));
68                }
69            }
70            Some(save_path) => {
71                if save_path.is_absolute() {
72                    if !(save_path.is_dir()) {
73                        return Err(anyhow::Error::msg(format!(
74                            "absolute save path \"{}\" is not a valid directory",
75                            save_path.display()
76                        )));
77                    }
78                    save_path.to_path_buf()
79                } else {
80                    // relative path, we concatenate it with the output directory
81                    if let Some(output_dir) = self.output_directory() {
82                        let absolute = output_dir.join(save_path);
83                        if !absolute.is_dir() {
84                            return Err(anyhow::Error::msg(format!(
85                                "relative save path \"{}\" is not a valid directory when combined with output directory \"{}\"",
86                                save_path.display(),
87                                output_dir.display()
88                            )));
89                        }
90                        absolute
91                    } else {
92                        return Err(anyhow::Error::msg(format!(
93                            "relative save path \"{}\" specified but no output directory was provided",
94                            save_path.display()
95                        )));
96                    }
97                }
98            }
99        };
100
101        // If the resolved directory already exists - bail.
102        if !self.current_outputs.insert(resolved_dir.clone()) {
103            anyhow::bail!(
104                "output directory {} already being used by another job",
105                resolved_dir.display()
106            );
107        } else {
108            Ok(resolved_dir)
109        }
110    }
111
112    /// Try to resolve `path` using the following approach:
113    ///
114    /// 1. If `path` is absolute - check that it exists and is a valid file. If
115    ///    successful, return `path` unaltered.
116    ///
117    /// 2. If `path` is relative, work through `self.search_directories()` in order,
118    ///    returning the absolute path first existing file.
119    pub fn check_path(&self, path: &Path) -> Result<PathBuf, anyhow::Error> {
120        // Check if the file exists (allowing for relative paths with respect to the current
121        // directory.
122        //
123        // If the path is an absolute path and the file does not exist, then bail.
124        if path.is_absolute() {
125            if path.is_file() {
126                return Ok(path.into());
127            } else {
128                return Err(anyhow::Error::msg(format!(
129                    "input file with absolute path \"{}\" either does not exist or is not a file",
130                    path.display()
131                )));
132            }
133        };
134
135        // At this point, start searching in the provided directories.
136        for dir in self.search_directories() {
137            let absolute = dir.join(path);
138            if absolute.is_file() {
139                return Ok(absolute);
140            }
141        }
142        Err(anyhow::Error::msg(format!(
143            "could not find input file \"{}\" in the search directories \"{:?}\"",
144            path.display(),
145            self.search_directories(),
146        )))
147    }
148}
149
150///////////
151// Tests //
152///////////
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    use std::fs::{create_dir, File};
159
160    #[test]
161    fn test_constructor() {
162        let checker = Checker::new(Vec::new(), None);
163        assert!(checker.search_directories().is_empty());
164        assert!(checker.output_directory().is_none());
165
166        let dir_a: PathBuf = "directory/a".into();
167        let dir_b: PathBuf = "directory/another/b".into();
168
169        let checker = Checker::new(vec![dir_a.clone()], Some(dir_b.clone()));
170        assert_eq!(checker.search_directories(), vec![dir_a.clone()]);
171        assert_eq!(checker.output_directory(), Some(&dir_b));
172
173        let checker = Checker::new(vec![dir_a.clone(), dir_b.clone()], None);
174        assert_eq!(
175            checker.search_directories(),
176            vec![dir_a.clone(), dir_b.clone()]
177        );
178        assert!(checker.output_directory().is_none());
179    }
180
181    #[test]
182    fn test_check_path() {
183        // We create a directory that looks like this:
184        //
185        // dir/
186        //     file_a.txt
187        //     dir0/
188        //        file_b.txt
189        //     dir1/
190        //        file_c.txt
191        //        dir0/
192        //           file_c.txt
193        let dir = tempfile::tempdir().unwrap();
194        let path = dir.path();
195
196        File::create(path.join("file_a.txt")).unwrap();
197        println!("{}", path.join("file_a.txt").is_file());
198
199        create_dir(path.join("dir0")).unwrap();
200        create_dir(path.join("dir1")).unwrap();
201        create_dir(path.join("dir1/dir0")).unwrap();
202        File::create(path.join("dir0/file_b.txt")).unwrap();
203        File::create(path.join("dir1/file_c.txt")).unwrap();
204        File::create(path.join("dir1/dir0/file_c.txt")).unwrap();
205
206        let make_checker = |paths: &[PathBuf]| -> Checker { Checker::new(paths.to_vec(), None) };
207
208        // Test absolute path success.
209        {
210            let checker = make_checker(&[]);
211            let absolute = path.join("file_a.txt");
212            assert_eq!(
213                checker.check_path(&absolute).unwrap(),
214                absolute,
215                "absolute paths should be unmodified if they exist",
216            );
217
218            let absolute = path.join("dir0/file_b.txt");
219            assert_eq!(
220                checker.check_path(&absolute).unwrap(),
221                absolute,
222                "absolute paths should be unmodified if they exist",
223            );
224        }
225
226        // Absolute path fail.
227        {
228            let checker = make_checker(&[]);
229            let absolute = path.join("dir0/file_c.txt");
230            let err = checker.check_path(&absolute).unwrap_err();
231            let message = err.to_string();
232            assert!(message.contains("input file with absolute path"));
233            assert!(message.contains("either does not exist or is not a file"));
234        }
235
236        // Directory search
237        {
238            let checker =
239                make_checker(&[path.join("dir1/dir0"), path.join("dir1"), path.join("dir0")]);
240
241            // Directories are searched in order.
242            let file = &Path::new("file_c.txt");
243            let resolved = checker.check_path(file).unwrap();
244            assert_eq!(resolved, path.join("dir1/dir0/file_c.txt"));
245
246            let file = &Path::new("file_b.txt");
247            let resolved = checker.check_path(file).unwrap();
248            assert_eq!(resolved, path.join("dir0/file_b.txt"));
249
250            // Directory search can fail.
251            let file = &Path::new("file_a.txt");
252            let err = checker.check_path(file).unwrap_err();
253            let message = err.to_string();
254            assert!(message.contains("could not find input file"));
255            assert!(message.contains("in the search directories"));
256
257            // If we give an absolute path, no directory search is performed.
258            let file = path.join("file_c.txt");
259            let err = checker.check_path(&file).unwrap_err();
260            let message = err.to_string();
261            assert!(message.starts_with("input file with absolute path"));
262        }
263    }
264}