swiftide_indexing/loaders/
file_loader.rs

1//! Load files from a directory
2use std::{
3    io::Read as _,
4    path::{Path, PathBuf},
5};
6
7use anyhow::Context as _;
8use swiftide_core::{indexing::IndexingStream, indexing::Node, Loader};
9
10/// The `FileLoader` struct is responsible for loading files from a specified directory,
11/// filtering them based on their extensions, and creating a stream of these files for further
12/// processing.
13///
14/// # Example
15///
16/// ```no_run
17/// // Create a pipeline that loads the current directory
18/// // and indexes all files with the ".rs" extension.
19/// # use swiftide_indexing as indexing;
20/// # use swiftide_indexing::loaders::FileLoader;
21/// indexing::Pipeline::from_loader(FileLoader::new(".").with_extensions(&["rs"]));
22/// ```
23#[derive(Clone, Debug)]
24pub struct FileLoader {
25    pub(crate) path: PathBuf,
26    pub(crate) extensions: Option<Vec<String>>,
27}
28
29impl FileLoader {
30    /// Creates a new `FileLoader` with the specified path.
31    ///
32    /// # Arguments
33    /// * `path` - The path to the directory to load files from.
34    ///
35    /// # Returns
36    /// A new instance of `FileLoader`.
37    pub fn new(path: impl Into<PathBuf>) -> Self {
38        Self {
39            path: path.into(),
40            extensions: None,
41        }
42    }
43
44    /// Adds extensions to the loader.
45    ///
46    /// # Arguments
47    /// * `extensions` - A list of extensions to add without the leading dot.
48    ///
49    /// # Returns
50    /// The `FileLoader` instance with the added extensions.
51    #[must_use]
52    pub fn with_extensions(mut self, extensions: &[impl AsRef<str>]) -> Self {
53        self.extensions = Some(
54            self.extensions
55                .unwrap_or_default()
56                .into_iter()
57                .chain(extensions.iter().map(|ext| ext.as_ref().to_string()))
58                .collect(),
59        );
60        self
61    }
62
63    /// Lists the nodes (files) that match the specified extensions.
64    ///
65    /// # Returns
66    /// A vector of `Node` representing the matching files.
67    ///
68    /// # Panics
69    /// This method will panic if it fails to read a file's content.
70    pub fn list_nodes(&self) -> Vec<Node> {
71        self.iter().filter_map(Result::ok).collect()
72    }
73
74    /// Iterates over the files in the directory
75    pub fn iter(&self) -> impl Iterator<Item = anyhow::Result<Node>> {
76        let path = self.path.clone();
77        let extensions = self.extensions.clone();
78
79        ignore::Walk::new(path)
80            .filter_map(Result::ok)
81            .filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
82            .filter(move |entry| file_has_extension(extensions.as_deref(), entry.path()))
83            .map(move |entry| {
84                tracing::debug!("Reading file: {:?}", entry);
85
86                // Files might be invalid utf-8, so we need to read them as bytes and convert it
87                // lossy, as Swiftide (currently) works internally with strings.
88                let mut file = fs_err::File::open(entry.path()).context("Failed to open file")?;
89                let mut buf = vec![];
90                file.read_to_end(&mut buf).context("Failed to read file")?;
91                let content = String::from_utf8_lossy(&buf);
92
93                let original_size = content.len();
94
95                Node::builder()
96                    .path(entry.path())
97                    .chunk(content)
98                    .original_size(original_size)
99                    .build()
100            })
101    }
102}
103
104// Helper function to check if a file has the specified extension.
105// If no extensions are specified, this function will return true.
106// If the file has no extension, this function will return false.
107fn file_has_extension(extensions: Option<&[impl AsRef<str>]>, path: &Path) -> bool {
108    extensions.as_ref().is_none_or(|exts| {
109        let Some(ext) = path.extension() else {
110            return false;
111        };
112        exts.iter()
113            .any(|e| e.as_ref() == ext.to_string_lossy().as_ref())
114    })
115}
116
117impl Loader for FileLoader {
118    /// Converts the `FileLoader` into a stream of `Node`.
119    ///
120    /// # Returns
121    /// An `IndexingStream` representing the stream of files.
122    ///
123    /// # Errors
124    /// This method will return an error if it fails to read a file's content.
125    fn into_stream(self) -> IndexingStream {
126        IndexingStream::iter(self.iter())
127    }
128
129    fn into_stream_boxed(self: Box<Self>) -> IndexingStream {
130        self.into_stream()
131    }
132}
133
134#[cfg(test)]
135mod test {
136
137    use tokio_stream::StreamExt as _;
138
139    use super::*;
140
141    #[test]
142    fn test_with_extensions() {
143        let loader = FileLoader::new("/tmp").with_extensions(&["rs"]);
144        assert_eq!(loader.extensions, Some(vec!["rs".to_string()]));
145    }
146
147    #[tokio::test]
148    async fn test_ignores_invalid_utf8() {
149        let tempdir = temp_dir::TempDir::new().unwrap();
150
151        fs_err::write(tempdir.child("invalid.txt"), [0x80, 0x80, 0x80]).unwrap();
152
153        let loader = FileLoader::new(tempdir.path()).with_extensions(&["txt"]);
154        let result = loader.into_stream().collect::<Vec<_>>().await;
155
156        assert_eq!(result.len(), 1);
157
158        let first = result.first().unwrap();
159
160        assert_eq!(first.as_ref().unwrap().chunk, "���".to_string());
161    }
162}