Skip to main content

rig_core/loaders/
file.rs

1use std::{fs, path::PathBuf, string::FromUtf8Error};
2
3use glob::glob;
4use thiserror::Error;
5
6#[derive(Error, Debug)]
7pub enum FileLoaderError {
8    #[error("Invalid glob pattern: {0}")]
9    InvalidGlobPattern(String),
10
11    #[error("IO error: {0}")]
12    IoError(#[from] std::io::Error),
13
14    #[error("Pattern error: {0}")]
15    PatternError(#[from] glob::PatternError),
16
17    #[error("Glob error: {0}")]
18    GlobError(#[from] glob::GlobError),
19
20    #[error("String conversion error: {0}")]
21    StringUtf8Error(#[from] FromUtf8Error),
22}
23
24// ================================================================
25// Implementing Readable trait for reading file contents
26// ================================================================
27pub(crate) trait Readable {
28    fn read(self) -> Result<String, FileLoaderError>;
29    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError>;
30}
31
32impl<'a> FileLoader<'a, PathBuf> {
33    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
34        FileLoader {
35            iterator: Box::new(self.iterator.map(|res| res.read())),
36        }
37    }
38    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
39        FileLoader {
40            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
41        }
42    }
43}
44
45impl Readable for PathBuf {
46    fn read(self) -> Result<String, FileLoaderError> {
47        fs::read_to_string(self).map_err(FileLoaderError::IoError)
48    }
49    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
50        let contents = fs::read_to_string(&self);
51        Ok((self, contents?))
52    }
53}
54
55impl Readable for Vec<u8> {
56    fn read(self) -> Result<String, FileLoaderError> {
57        Ok(String::from_utf8(self)?)
58    }
59
60    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
61        let res = String::from_utf8(self)?;
62
63        Ok((PathBuf::from("<memory>"), res))
64    }
65}
66
67impl<T: Readable> Readable for Result<T, FileLoaderError> {
68    fn read(self) -> Result<String, FileLoaderError> {
69        self.map(|t| t.read())?
70    }
71    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
72        self.map(|t| t.read_with_path())?
73    }
74}
75
76// ================================================================
77// FileLoader definitions and implementations
78// ================================================================
79
80/// [FileLoader] is a utility for loading files from the filesystem using glob patterns or directory
81///  paths. It provides methods to read file contents and handle errors gracefully.
82///
83/// # Errors
84///
85/// This module defines a custom error type [FileLoaderError] which can represent various errors
86///  that might occur during file loading operations, such as invalid glob patterns, IO errors, and
87///  glob errors.
88///
89/// # Example Usage
90///
91/// ```no_run
92/// use rig_core::loaders::FileLoader;
93///
94/// fn main() -> Result<(), Box<dyn std::error::Error>> {
95///     // Create a FileLoader using a glob pattern
96///     let loader = FileLoader::with_glob("path/to/files/*.txt")?;
97///
98///     // Read file contents, ignoring any errors
99///     let contents: Vec<String> = loader
100///         .read()
101///         .ignore_errors()
102///         .into_iter()
103///         .collect();
104///
105///     for content in contents {
106///         println!("{}", content);
107///     }
108///
109///     Ok(())
110/// }
111/// ```
112///
113/// [FileLoader] uses strict typing between the iterator methods to ensure that transitions between
114///   different implementations of the loaders and it's methods are handled properly by the compiler.
115pub struct FileLoader<'a, T> {
116    iterator: Box<dyn Iterator<Item = T> + 'a>,
117}
118
119impl<'a> FileLoader<'a, Result<PathBuf, FileLoaderError>> {
120    /// Reads the contents of the files within the iterator returned by [FileLoader::with_glob] or
121    ///  [FileLoader::with_dir].
122    ///
123    /// # Example
124    /// Read files in directory "files/*.txt" and print the content for each file
125    ///
126    /// ```no_run
127    /// # use rig_core::loaders::FileLoader;
128    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
129    /// let content = FileLoader::with_glob("files/*.txt")?.read();
130    /// for result in content {
131    ///     match result {
132    ///         Ok(content) => println!("{}", content),
133    ///         Err(e) => eprintln!("Error reading file: {}", e),
134    ///     }
135    /// }
136    /// # Ok(())
137    /// # }
138    /// ```
139    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
140        FileLoader {
141            iterator: Box::new(self.iterator.map(|res| res.read())),
142        }
143    }
144    /// Reads the contents of the files within the iterator returned by [FileLoader::with_glob] or
145    ///  [FileLoader::with_dir] and returns the path along with the content.
146    ///
147    /// # Example
148    /// Read files in directory "files/*.txt" and print the content for corresponding path for each
149    ///  file.
150    ///
151    /// ```no_run
152    /// # use rig_core::loaders::FileLoader;
153    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
154    /// let content = FileLoader::with_glob("files/*.txt")?.read_with_path();
155    /// for result in content {
156    ///     match result {
157    ///         Ok((path, content)) => println!("{:?} {}", path, content),
158    ///         Err(e) => eprintln!("Error reading file: {}", e),
159    ///     }
160    /// }
161    /// # Ok(())
162    /// # }
163    /// ```
164    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
165        FileLoader {
166            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
167        }
168    }
169}
170
171impl<'a, T> FileLoader<'a, Result<T, FileLoaderError>>
172where
173    T: 'a,
174{
175    /// Ignores errors in the iterator, returning only successful results. This can be used on any
176    ///  [FileLoader] state of iterator whose items are results.
177    ///
178    /// # Example
179    /// Read files in directory "files/*.txt" and ignore errors from unreadable files.
180    ///
181    /// ```no_run
182    /// # use rig_core::loaders::FileLoader;
183    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
184    /// let content = FileLoader::with_glob("files/*.txt")?.read().ignore_errors();
185    /// for content in content {
186    ///     println!("{}", content)
187    /// }
188    /// # Ok(())
189    /// # }
190    /// ```
191    pub fn ignore_errors(self) -> FileLoader<'a, T> {
192        FileLoader {
193            iterator: Box::new(self.iterator.filter_map(|res| res.ok())),
194        }
195    }
196}
197
198impl FileLoader<'_, Result<PathBuf, FileLoaderError>> {
199    /// Creates a new [FileLoader] using a glob pattern to match files.
200    ///
201    /// # Example
202    /// Create a [FileLoader] for all `.txt` files that match the glob "files/*.txt".
203    ///
204    /// ```no_run
205    /// # use rig_core::loaders::FileLoader;
206    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
207    /// let loader = FileLoader::with_glob("files/*.txt")?;
208    /// # Ok(())
209    /// # }
210    /// ```
211    pub fn with_glob(
212        pattern: &str,
213    ) -> Result<FileLoader<'_, Result<PathBuf, FileLoaderError>>, FileLoaderError> {
214        let paths = glob(pattern)?;
215        Ok(FileLoader {
216            iterator: Box::new(
217                paths
218                    .into_iter()
219                    .map(|path| path.map_err(FileLoaderError::GlobError)),
220            ),
221        })
222    }
223
224    /// Creates a new [FileLoader] on all files within a directory.
225    ///
226    /// # Example
227    /// Create a [FileLoader] for all files that are in the directory "files" (ignores subdirectories).
228    ///
229    /// ```no_run
230    /// # use rig_core::loaders::FileLoader;
231    /// # fn run() -> Result<(), Box<dyn std::error::Error>> {
232    /// let loader = FileLoader::with_dir("files")?;
233    /// # Ok(())
234    /// # }
235    /// ```
236    pub fn with_dir(
237        directory: &str,
238    ) -> Result<FileLoader<'_, Result<PathBuf, FileLoaderError>>, FileLoaderError> {
239        Ok(FileLoader {
240            iterator: Box::new(fs::read_dir(directory)?.filter_map(|entry| {
241                let path = entry.ok()?.path();
242                if path.is_file() { Some(Ok(path)) } else { None }
243            })),
244        })
245    }
246}
247
248impl<'a> FileLoader<'a, Vec<u8>> {
249    /// Ingest a  as a byte array.
250    pub fn from_bytes(bytes: Vec<u8>) -> FileLoader<'a, Vec<u8>> {
251        FileLoader {
252            iterator: Box::new(vec![bytes].into_iter()),
253        }
254    }
255
256    /// Ingest multiple byte arrays.
257    pub fn from_bytes_multi(bytes_vec: Vec<Vec<u8>>) -> FileLoader<'a, Vec<u8>> {
258        FileLoader {
259            iterator: Box::new(bytes_vec.into_iter()),
260        }
261    }
262
263    /// Use this once you've created the loader to load the document in.
264    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
265        FileLoader {
266            iterator: Box::new(self.iterator.map(|res| res.read())),
267        }
268    }
269
270    /// Use this once you've created the reader to load the document in (and get the path).
271    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
272        FileLoader {
273            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
274        }
275    }
276}
277
278// ================================================================
279// Iterators for FileLoader
280// ================================================================
281
282pub struct IntoIter<'a, T> {
283    iterator: Box<dyn Iterator<Item = T> + 'a>,
284}
285
286impl<'a, T> IntoIterator for FileLoader<'a, T> {
287    type Item = T;
288    type IntoIter = IntoIter<'a, T>;
289
290    fn into_iter(self) -> Self::IntoIter {
291        IntoIter {
292            iterator: self.iterator,
293        }
294    }
295}
296
297impl<T> Iterator for IntoIter<'_, T> {
298    type Item = T;
299
300    fn next(&mut self) -> Option<Self::Item> {
301        self.iterator.next()
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use assert_fs::prelude::{FileTouch, FileWriteStr, PathChild};
308
309    use super::FileLoader;
310
311    #[test]
312    fn test_file_loader() {
313        let temp = assert_fs::TempDir::new().expect("Failed to create temp dir");
314        let foo_file = temp.child("foo.txt");
315        let bar_file = temp.child("bar.txt");
316
317        foo_file.touch().expect("Failed to create foo.txt");
318        bar_file.touch().expect("Failed to create bar.txt");
319
320        foo_file.write_str("foo").expect("Failed to write to foo");
321        bar_file.write_str("bar").expect("Failed to write to bar");
322
323        let glob = temp.path().to_string_lossy().to_string() + "/*.txt";
324
325        let loader = FileLoader::with_glob(&glob).unwrap();
326        let mut actual = loader
327            .ignore_errors()
328            .read()
329            .ignore_errors()
330            .into_iter()
331            .collect::<Vec<_>>();
332        let mut expected = vec!["foo".to_string(), "bar".to_string()];
333
334        actual.sort();
335        expected.sort();
336
337        assert!(!actual.is_empty());
338        assert!(expected == actual)
339    }
340
341    #[test]
342    fn test_file_loader_bytes() {
343        let temp = assert_fs::TempDir::new().expect("Failed to create temp dir");
344        let foo_file = temp.child("foo.txt");
345        let bar_file = temp.child("bar.txt");
346
347        foo_file.touch().expect("Failed to create foo.txt");
348        bar_file.touch().expect("Failed to create bar.txt");
349
350        foo_file.write_str("foo").expect("Failed to write to foo");
351        bar_file.write_str("bar").expect("Failed to write to bar");
352
353        let foo_bytes = std::fs::read(foo_file.path()).unwrap();
354        let bar_bytes = std::fs::read(bar_file.path()).unwrap();
355
356        let loader = FileLoader::from_bytes_multi(vec![foo_bytes, bar_bytes]);
357        let mut actual = loader
358            .read()
359            .ignore_errors()
360            .into_iter()
361            .collect::<Vec<_>>();
362        let mut expected = vec!["foo".to_string(), "bar".to_string()];
363
364        actual.sort();
365        expected.sort();
366
367        assert!(!actual.is_empty());
368        assert!(expected == actual)
369    }
370}