rig/loaders/
file.rs

1use std::{fs, path::PathBuf, string::FromUtf8Error};
2
3use glob::glob;
4use thiserror::Error;
5
6#[derive(Error, Debug)]
7pub enum FileLoaderError {
8    #[error("Invalid glob pattern: {0}")]
9    InvalidGlobPattern(String),
10
11    #[error("IO error: {0}")]
12    IoError(#[from] std::io::Error),
13
14    #[error("Pattern error: {0}")]
15    PatternError(#[from] glob::PatternError),
16
17    #[error("Glob error: {0}")]
18    GlobError(#[from] glob::GlobError),
19
20    #[error("String conversion error: {0}")]
21    StringUtf8Error(#[from] FromUtf8Error),
22}
23
24// ================================================================
25// Implementing Readable trait for reading file contents
26// ================================================================
27pub(crate) trait Readable {
28    fn read(self) -> Result<String, FileLoaderError>;
29    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError>;
30}
31
32impl<'a> FileLoader<'a, PathBuf> {
33    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
34        FileLoader {
35            iterator: Box::new(self.iterator.map(|res| res.read())),
36        }
37    }
38    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
39        FileLoader {
40            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
41        }
42    }
43}
44
45impl Readable for PathBuf {
46    fn read(self) -> Result<String, FileLoaderError> {
47        fs::read_to_string(self).map_err(FileLoaderError::IoError)
48    }
49    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
50        let contents = fs::read_to_string(&self);
51        Ok((self, contents?))
52    }
53}
54
55impl Readable for Vec<u8> {
56    fn read(self) -> Result<String, FileLoaderError> {
57        Ok(String::from_utf8(self)?)
58    }
59
60    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
61        let res = String::from_utf8(self)?;
62
63        Ok((PathBuf::from("<memory>"), res))
64    }
65}
66
67impl<T: Readable> Readable for Result<T, FileLoaderError> {
68    fn read(self) -> Result<String, FileLoaderError> {
69        self.map(|t| t.read())?
70    }
71    fn read_with_path(self) -> Result<(PathBuf, String), FileLoaderError> {
72        self.map(|t| t.read_with_path())?
73    }
74}
75
76// ================================================================
77// FileLoader definitions and implementations
78// ================================================================
79
80/// [FileLoader] is a utility for loading files from the filesystem using glob patterns or directory
81///  paths. It provides methods to read file contents and handle errors gracefully.
82///
83/// # Errors
84///
85/// This module defines a custom error type [FileLoaderError] which can represent various errors
86///  that might occur during file loading operations, such as invalid glob patterns, IO errors, and
87///  glob errors.
88///
89/// # Example Usage
90///
91/// ```rust
92/// use rig:loaders::FileLoader;
93///
94/// fn main() -> Result<(), Box<dyn std::error::Error>> {
95///     // Create a FileLoader using a glob pattern
96///     let loader = FileLoader::with_glob("path/to/files/*.txt")?;
97///
98///     // Read file contents, ignoring any errors
99///     let contents: Vec<String> = loader
100///         .read()
101///         .ignore_errors()
102///
103///     for content in contents {
104///         println!("{}", content);
105///     }
106///
107///     Ok(())
108/// }
109/// ```
110///
111/// [FileLoader] uses strict typing between the iterator methods to ensure that transitions between
112///   different implementations of the loaders and it's methods are handled properly by the compiler.
113pub struct FileLoader<'a, T> {
114    iterator: Box<dyn Iterator<Item = T> + 'a>,
115}
116
117impl<'a> FileLoader<'a, Result<PathBuf, FileLoaderError>> {
118    /// Reads the contents of the files within the iterator returned by [FileLoader::with_glob] or
119    ///  [FileLoader::with_dir].
120    ///
121    /// # Example
122    /// Read files in directory "files/*.txt" and print the content for each file
123    ///
124    /// ```rust
125    /// let content = FileLoader::with_glob(...)?.read();
126    /// for result in content {
127    ///     match result {
128    ///         Ok(content) => println!("{}", content),
129    ///         Err(e) => eprintln!("Error reading file: {}", e),
130    ///     }
131    /// }
132    /// ```
133    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
134        FileLoader {
135            iterator: Box::new(self.iterator.map(|res| res.read())),
136        }
137    }
138    /// Reads the contents of the files within the iterator returned by [FileLoader::with_glob] or
139    ///  [FileLoader::with_dir] and returns the path along with the content.
140    ///
141    /// # Example
142    /// Read files in directory "files/*.txt" and print the content for corresponding path for each
143    ///  file.
144    ///
145    /// ```rust
146    /// let content = FileLoader::with_glob("files/*.txt")?.read();
147    /// for (path, result) in content {
148    ///     match result {
149    ///         Ok((path, content)) => println!("{:?} {}", path, content),
150    ///         Err(e) => eprintln!("Error reading file: {}", e),
151    ///     }
152    /// }
153    /// ```
154    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
155        FileLoader {
156            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
157        }
158    }
159}
160
161impl<'a, T: 'a> FileLoader<'a, Result<T, FileLoaderError>> {
162    /// Ignores errors in the iterator, returning only successful results. This can be used on any
163    ///  [FileLoader] state of iterator whose items are results.
164    ///
165    /// # Example
166    /// Read files in directory "files/*.txt" and ignore errors from unreadable files.
167    ///
168    /// ```rust
169    /// let content = FileLoader::with_glob("files/*.txt")?.read().ignore_errors();
170    /// for result in content {
171    ///     println!("{}", content)
172    /// }
173    /// ```
174    pub fn ignore_errors(self) -> FileLoader<'a, T> {
175        FileLoader {
176            iterator: Box::new(self.iterator.filter_map(|res| res.ok())),
177        }
178    }
179}
180
181impl FileLoader<'_, Result<PathBuf, FileLoaderError>> {
182    /// Creates a new [FileLoader] using a glob pattern to match files.
183    ///
184    /// # Example
185    /// Create a [FileLoader] for all `.txt` files that match the glob "files/*.txt".
186    ///
187    /// ```rust
188    /// let loader = FileLoader::with_glob("files/*.txt")?;
189    /// ```
190    pub fn with_glob(
191        pattern: &str,
192    ) -> Result<FileLoader<Result<PathBuf, FileLoaderError>>, FileLoaderError> {
193        let paths = glob(pattern)?;
194        Ok(FileLoader {
195            iterator: Box::new(
196                paths
197                    .into_iter()
198                    .map(|path| path.map_err(FileLoaderError::GlobError)),
199            ),
200        })
201    }
202
203    /// Creates a new [FileLoader] on all files within a directory.
204    ///
205    /// # Example
206    /// Create a [FileLoader] for all files that are in the directory "files" (ignores subdirectories).
207    ///
208    /// ```rust
209    /// let loader = FileLoader::with_dir("files")?;
210    /// ```
211    pub fn with_dir(
212        directory: &str,
213    ) -> Result<FileLoader<Result<PathBuf, FileLoaderError>>, FileLoaderError> {
214        Ok(FileLoader {
215            iterator: Box::new(fs::read_dir(directory)?.filter_map(|entry| {
216                let path = entry.ok()?.path();
217                if path.is_file() { Some(Ok(path)) } else { None }
218            })),
219        })
220    }
221}
222
223impl<'a> FileLoader<'a, Vec<u8>> {
224    /// Ingest a  as a byte array.
225    pub fn from_bytes(bytes: Vec<u8>) -> FileLoader<'a, Vec<u8>> {
226        FileLoader {
227            iterator: Box::new(vec![bytes].into_iter()),
228        }
229    }
230
231    /// Ingest multiple byte arrays.
232    pub fn from_bytes_multi(bytes_vec: Vec<Vec<u8>>) -> FileLoader<'a, Vec<u8>> {
233        FileLoader {
234            iterator: Box::new(bytes_vec.into_iter()),
235        }
236    }
237
238    /// Use this once you've created the loader to load the document in.
239    pub fn read(self) -> FileLoader<'a, Result<String, FileLoaderError>> {
240        FileLoader {
241            iterator: Box::new(self.iterator.map(|res| res.read())),
242        }
243    }
244
245    /// Use this once you've created the reader to load the document in (and get the path).
246    pub fn read_with_path(self) -> FileLoader<'a, Result<(PathBuf, String), FileLoaderError>> {
247        FileLoader {
248            iterator: Box::new(self.iterator.map(|res| res.read_with_path())),
249        }
250    }
251}
252
253// ================================================================
254// Iterators for FileLoader
255// ================================================================
256
257pub struct IntoIter<'a, T> {
258    iterator: Box<dyn Iterator<Item = T> + 'a>,
259}
260
261impl<'a, T> IntoIterator for FileLoader<'a, T> {
262    type Item = T;
263    type IntoIter = IntoIter<'a, T>;
264
265    fn into_iter(self) -> Self::IntoIter {
266        IntoIter {
267            iterator: self.iterator,
268        }
269    }
270}
271
272impl<T> Iterator for IntoIter<'_, T> {
273    type Item = T;
274
275    fn next(&mut self) -> Option<Self::Item> {
276        self.iterator.next()
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use assert_fs::prelude::{FileTouch, FileWriteStr, PathChild};
283
284    use super::FileLoader;
285
286    #[test]
287    fn test_file_loader() {
288        let temp = assert_fs::TempDir::new().expect("Failed to create temp dir");
289        let foo_file = temp.child("foo.txt");
290        let bar_file = temp.child("bar.txt");
291
292        foo_file.touch().expect("Failed to create foo.txt");
293        bar_file.touch().expect("Failed to create bar.txt");
294
295        foo_file.write_str("foo").expect("Failed to write to foo");
296        bar_file.write_str("bar").expect("Failed to write to bar");
297
298        let glob = temp.path().to_string_lossy().to_string() + "/*.txt";
299
300        let loader = FileLoader::with_glob(&glob).unwrap();
301        let mut actual = loader
302            .ignore_errors()
303            .read()
304            .ignore_errors()
305            .into_iter()
306            .collect::<Vec<_>>();
307        let mut expected = vec!["foo".to_string(), "bar".to_string()];
308
309        actual.sort();
310        expected.sort();
311
312        assert!(!actual.is_empty());
313        assert!(expected == actual)
314    }
315
316    #[test]
317    fn test_file_loader_bytes() {
318        let temp = assert_fs::TempDir::new().expect("Failed to create temp dir");
319        let foo_file = temp.child("foo.txt");
320        let bar_file = temp.child("bar.txt");
321
322        foo_file.touch().expect("Failed to create foo.txt");
323        bar_file.touch().expect("Failed to create bar.txt");
324
325        foo_file.write_str("foo").expect("Failed to write to foo");
326        bar_file.write_str("bar").expect("Failed to write to bar");
327
328        let foo_bytes = std::fs::read(foo_file.path()).unwrap();
329        let bar_bytes = std::fs::read(bar_file.path()).unwrap();
330
331        let loader = FileLoader::from_bytes_multi(vec![foo_bytes, bar_bytes]);
332        let mut actual = loader
333            .read()
334            .ignore_errors()
335            .into_iter()
336            .collect::<Vec<_>>();
337        let mut expected = vec!["foo".to_string(), "bar".to_string()];
338
339        actual.sort();
340        expected.sort();
341
342        assert!(!actual.is_empty());
343        assert!(expected == actual)
344    }
345}