rig/loaders/
file.rs

1use std::{fs, path::PathBuf, string::FromUtf8Error};
2
3use glob::glob;
4use thiserror::Error;
5
6#[derive(Error, Debug)]
7pub enum FileLoaderError {
8    #[error("Invalid glob pattern: {0}")]
9    InvalidGlobPattern(String),
10
11    #[error("IO error: {0}")]
12    IoError(#[from] std::io::Error),
13
14    #[error("Pattern error: {0}")]
15    PatternError(#[from] glob::PatternError),
16
17    #[error("Glob error: {0}")]
18    GlobError(#[from] glob::GlobError),
19
20    #[error("String conversion error: {0}")]
21    StringUtf8Error(#[from] FromUtf8Error),
22}
23
24// ================================================================
25// Implementing Readable trait for reading file contents
26// ================================================================
27pub(crate) trait Readable {
28    fn read(self) -> Result<String, FileLoaderError>;
29    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError>;
30}
31
32impl<'a> FileLoader<'a, PathBuf> {
33    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
34        FileLoader {
35            iterator: Box::new(self.iterator.map(|res| res.read())),
36        }
37    }
38    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
39        FileLoader {
40            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
41        }
42    }
43}
44
45impl Readable for PathBuf {
46    fn read(self) -> Result<String, FileLoaderError> {
47        fs::read_to_string(self).map_err(FileLoaderError::IoError)
48    }
49    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
50        let contents = fs::read_to_string(&self);
51        Ok((self, contents?))
52    }
53}
54
55impl Readable for Vec<u8> {
56    fn read(self) -> Result<String, FileLoaderError> {
57        Ok(String::from_utf8(self)?)
58    }
59
60    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
61        let res = String::from_utf8(self)?;
62
63        Ok((PathBuf::from("<memory>"), res))
64    }
65}
66
67impl<T: Readable> Readable for Result<T, FileLoaderError> {
68    fn read(self) -> Result<String, FileLoaderError> {
69        self.map(|t| t.read())?
70    }
71    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
72        self.map(|t| t.read_with_path())?
73    }
74}
75
76// ================================================================
77// FileLoader definitions and implementations
78// ================================================================
79
80/// [FileLoader] is a utility for loading files from the filesystem using glob patterns or directory
81///  paths. It provides methods to read file contents and handle errors gracefully.
82///
83/// # Errors
84///
85/// This module defines a custom error type [FileLoaderError] which can represent various errors
86///  that might occur during file loading operations, such as invalid glob patterns, IO errors, and
87///  glob errors.
88///
89/// # Example Usage
90///
91/// ```rust
92/// use rig:loaders::FileLoader;
93///
94/// fn main() -> Result<(), Box<dyn std::error::Error>> {
95///     // Create a FileLoader using a glob pattern
96///     let loader = FileLoader::with_glob("path/to/files/*.txt")?;
97///
98///     // Read file contents, ignoring any errors
99///     let contents: Vec<String> = loader
100///         .read()
101///         .ignore_errors()
102///
103///     for content in contents {
104///         println!("{}", content);
105///     }
106///
107///     Ok(())
108/// }
109/// ```
110///
111/// [FileLoader] uses strict typing between the iterator methods to ensure that transitions between
112///   different implementations of the loaders and it's methods are handled properly by the compiler.
113pub struct FileLoader<'a, T> {
114    iterator: Box<dyn Iterator<Item = T> + 'a>,
115}
116
117impl<'a> FileLoader<'a, Result<PathBuf, FileLoaderError>> {
118    /// Reads the contents of the files within the iterator returned by [FileLoader::with_glob] or
119    ///  [FileLoader::with_dir].
120    ///
121    /// # Example
122    /// Read files in directory "files/*.txt" and print the content for each file
123    ///
124    /// ```rust
125    /// let content = FileLoader::with_glob(...)?.read();
126    /// for result in content {
127    ///     match result {
128    ///         Ok(content) => println!("{}", content),
129    ///         Err(e) => eprintln!("Error reading file: {}", e),
130    ///     }
131    /// }
132    /// ```
133    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
134        FileLoader {
135            iterator: Box::new(self.iterator.map(|res| res.read())),
136        }
137    }
138    /// Reads the contents of the files within the iterator returned by [FileLoader::with_glob] or
139    ///  [FileLoader::with_dir] and returns the path along with the content.
140    ///
141    /// # Example
142    /// Read files in directory "files/*.txt" and print the content for corresponding path for each
143    ///  file.
144    ///
145    /// ```rust
146    /// let content = FileLoader::with_glob("files/*.txt")?.read();
147    /// for (path, result) in content {
148    ///     match result {
149    ///         Ok((path, content)) => println!("{:?} {}", path, content),
150    ///         Err(e) => eprintln!("Error reading file: {}", e),
151    ///     }
152    /// }
153    /// ```
154    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
155        FileLoader {
156            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
157        }
158    }
159}
160
161impl<'a, T> FileLoader<'a, Result<T, FileLoaderError>>
162where
163    T: 'a,
164{
165    /// Ignores errors in the iterator, returning only successful results. This can be used on any
166    ///  [FileLoader] state of iterator whose items are results.
167    ///
168    /// # Example
169    /// Read files in directory "files/*.txt" and ignore errors from unreadable files.
170    ///
171    /// ```rust
172    /// let content = FileLoader::with_glob("files/*.txt")?.read().ignore_errors();
173    /// for result in content {
174    ///     println!("{}", content)
175    /// }
176    /// ```
177    pub fn ignore_errors(self) -> FileLoader<'a, T> {
178        FileLoader {
179            iterator: Box::new(self.iterator.filter_map(|res| res.ok())),
180        }
181    }
182}
183
184impl FileLoader<'_, Result<PathBuf, FileLoaderError>> {
185    /// Creates a new [FileLoader] using a glob pattern to match files.
186    ///
187    /// # Example
188    /// Create a [FileLoader] for all `.txt` files that match the glob "files/*.txt".
189    ///
190    /// ```rust
191    /// let loader = FileLoader::with_glob("files/*.txt")?;
192    /// ```
193    pub fn with_glob(
194        pattern: &str,
195    ) -> Result<FileLoader<'_, Result<PathBuf, FileLoaderError>>, FileLoaderError> {
196        let paths = glob(pattern)?;
197        Ok(FileLoader {
198            iterator: Box::new(
199                paths
200                    .into_iter()
201                    .map(|path| path.map_err(FileLoaderError::GlobError)),
202            ),
203        })
204    }
205
206    /// Creates a new [FileLoader] on all files within a directory.
207    ///
208    /// # Example
209    /// Create a [FileLoader] for all files that are in the directory "files" (ignores subdirectories).
210    ///
211    /// ```rust
212    /// let loader = FileLoader::with_dir("files")?;
213    /// ```
214    pub fn with_dir(
215        directory: &str,
216    ) -> Result<FileLoader<'_, Result<PathBuf, FileLoaderError>>, FileLoaderError> {
217        Ok(FileLoader {
218            iterator: Box::new(fs::read_dir(directory)?.filter_map(|entry| {
219                let path = entry.ok()?.path();
220                if path.is_file() { Some(Ok(path)) } else { None }
221            })),
222        })
223    }
224}
225
226impl<'a> FileLoader<'a, Vec<u8>> {
227    /// Ingest a  as a byte array.
228    pub fn from_bytes(bytes: Vec<u8>) -> FileLoader<'a, Vec<u8>> {
229        FileLoader {
230            iterator: Box::new(vec![bytes].into_iter()),
231        }
232    }
233
234    /// Ingest multiple byte arrays.
235    pub fn from_bytes_multi(bytes_vec: Vec<Vec<u8>>) -> FileLoader<'a, Vec<u8>> {
236        FileLoader {
237            iterator: Box::new(bytes_vec.into_iter()),
238        }
239    }
240
241    /// Use this once you've created the loader to load the document in.
242    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
243        FileLoader {
244            iterator: Box::new(self.iterator.map(|res| res.read())),
245        }
246    }
247
248    /// Use this once you've created the reader to load the document in (and get the path).
249    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
250        FileLoader {
251            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
252        }
253    }
254}
255
256// ================================================================
257// Iterators for FileLoader
258// ================================================================
259
260pub struct IntoIter<'a, T> {
261    iterator: Box<dyn Iterator<Item = T> + 'a>,
262}
263
264impl<'a, T> IntoIterator for FileLoader<'a, T> {
265    type Item = T;
266    type IntoIter = IntoIter<'a, T>;
267
268    fn into_iter(self) -> Self::IntoIter {
269        IntoIter {
270            iterator: self.iterator,
271        }
272    }
273}
274
275impl<T> Iterator for IntoIter<'_, T> {
276    type Item = T;
277
278    fn next(&mut self) -> Option<Self::Item> {
279        self.iterator.next()
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use assert_fs::prelude::{FileTouch, FileWriteStr, PathChild};
286
287    use super::FileLoader;
288
289    #[test]
290    fn test_file_loader() {
291        let temp = assert_fs::TempDir::new().expect("Failed to create temp dir");
292        let foo_file = temp.child("foo.txt");
293        let bar_file = temp.child("bar.txt");
294
295        foo_file.touch().expect("Failed to create foo.txt");
296        bar_file.touch().expect("Failed to create bar.txt");
297
298        foo_file.write_str("foo").expect("Failed to write to foo");
299        bar_file.write_str("bar").expect("Failed to write to bar");
300
301        let glob = temp.path().to_string_lossy().to_string() + "/*.txt";
302
303        let loader = FileLoader::with_glob(&glob).unwrap();
304        let mut actual = loader
305            .ignore_errors()
306            .read()
307            .ignore_errors()
308            .into_iter()
309            .collect::<Vec<_>>();
310        let mut expected = vec!["foo".to_string(), "bar".to_string()];
311
312        actual.sort();
313        expected.sort();
314
315        assert!(!actual.is_empty());
316        assert!(expected == actual)
317    }
318
319    #[test]
320    fn test_file_loader_bytes() {
321        let temp = assert_fs::TempDir::new().expect("Failed to create temp dir");
322        let foo_file = temp.child("foo.txt");
323        let bar_file = temp.child("bar.txt");
324
325        foo_file.touch().expect("Failed to create foo.txt");
326        bar_file.touch().expect("Failed to create bar.txt");
327
328        foo_file.write_str("foo").expect("Failed to write to foo");
329        bar_file.write_str("bar").expect("Failed to write to bar");
330
331        let foo_bytes = std::fs::read(foo_file.path()).unwrap();
332        let bar_bytes = std::fs::read(bar_file.path()).unwrap();
333
334        let loader = FileLoader::from_bytes_multi(vec![foo_bytes, bar_bytes]);
335        let mut actual = loader
336            .read()
337            .ignore_errors()
338            .into_iter()
339            .collect::<Vec<_>>();
340        let mut expected = vec!["foo".to_string(), "bar".to_string()];
341
342        actual.sort();
343        expected.sort();
344
345        assert!(!actual.is_empty());
346        assert!(expected == actual)
347    }
348}