swiftide_indexing/loaders/
file_loader.rs1use std::{
3 io::Read as _,
4 path::{Path, PathBuf},
5};
6
7use anyhow::Context as _;
8use ignore::{DirEntry, Walk};
9use swiftide_core::{Loader, indexing::IndexingStream, indexing::TextNode};
10use tracing::{Span, debug_span, instrument};
11
12#[derive(Clone, Debug)]
25pub struct FileLoader {
26 pub(crate) root: PathBuf,
27 pub(crate) extensions: Option<Vec<String>>,
28}
29
30impl FileLoader {
31 pub fn new(root: impl AsRef<Path>) -> Self {
41 Self {
42 root: root.as_ref().to_path_buf(),
43 extensions: None,
44 }
45 }
46
47 #[must_use]
57 pub fn with_extensions(mut self, extensions: &[impl AsRef<str>]) -> Self {
58 let existing = self.extensions.get_or_insert_default();
59 existing.extend(extensions.iter().map(|ext| ext.as_ref().to_string()));
60 self
61 }
62
63 pub fn list_nodes(&self) -> Vec<TextNode> {
73 self.iter().filter_map(Result::ok).collect()
74 }
75
76 pub fn iter(&self) -> impl Iterator<Item = anyhow::Result<TextNode>> + use<> {
78 Iter::new(&self.root, self.extensions.clone()).fuse()
79 }
80}
81
82struct Iter {
86 walk: Walk,
88 include_extensions: Option<Vec<String>>,
90 span: Span,
92}
93
94impl Iterator for Iter {
95 type Item = anyhow::Result<TextNode>;
96
97 fn next(&mut self) -> Option<Self::Item> {
98 let _span = self.span.enter();
99 loop {
100 let entry = self.walk.next()?;
102
103 let entry = match entry {
105 Ok(entry) => entry,
106 Err(err) => return Some(Err(err.into())),
107 };
108
109 if let Some(node) = self.load(&entry) {
110 return Some(node);
111 }
112 }
113 }
114}
115
116impl Iter {
117 fn new(root: &Path, include_extensions: Option<Vec<String>>) -> Self {
119 let span = debug_span!("file_loader", root = %root.display());
120 tracing::debug!(parent: &span, extensions = ?include_extensions, "Loading files");
121 Self {
122 walk: Walk::new(root),
123 include_extensions,
124 span,
125 }
126 }
127
128 #[instrument(skip_all, fields(path = %entry.path().display()))]
129 fn load(&self, entry: &DirEntry) -> Option<anyhow::Result<TextNode>> {
130 if entry.file_type().is_some_and(|ft| !ft.is_file()) {
131 return None;
133 }
134 if let Some(extensions) = &self.include_extensions {
135 let Some(extension) = entry.path().extension() else {
136 tracing::trace!("Skipping file without extension");
137 return None;
138 };
139 let extension = extension.to_string_lossy();
140 if !extensions.iter().any(|ext| ext == &extension) {
141 tracing::trace!("Skipping file with extension {extension}");
142 return None;
143 }
144 }
145 tracing::debug!("Loading file");
146 match read_node(entry) {
147 Ok(node) => {
148 tracing::debug!(node_id = %node.id(), "Loaded file");
149 Some(Ok(node))
150 }
151 Err(err) => {
152 tracing::error!(error = %err, "Failed to load file");
153 Some(Err(err))
154 }
155 }
156 }
157}
158
159fn read_node(entry: &DirEntry) -> anyhow::Result<TextNode> {
160 let mut file = fs_err::File::open(entry.path()).context("Failed to open file")?;
163 let mut buf = vec![];
164 file.read_to_end(&mut buf).context("Failed to read file")?;
165 let content = String::from_utf8_lossy(&buf);
166
167 let original_size = content.len();
168
169 TextNode::builder()
170 .path(entry.path())
171 .chunk(content)
172 .original_size(original_size)
173 .build()
174}
175
176impl Loader for FileLoader {
177 type Output = String;
178
179 fn into_stream(self) -> IndexingStream<String> {
188 IndexingStream::iter(self.iter())
189 }
190
191 fn into_stream_boxed(self: Box<Self>) -> IndexingStream<String> {
192 self.into_stream()
193 }
194}
195
196#[cfg(test)]
197mod test {
198
199 use tokio_stream::StreamExt as _;
200
201 use super::*;
202
203 #[test]
204 fn test_with_extensions() {
205 let loader = FileLoader::new("/tmp").with_extensions(&["rs"]);
206 assert_eq!(loader.extensions, Some(vec!["rs".to_string()]));
207 }
208
209 #[tokio::test]
210 async fn test_ignores_invalid_utf8() {
211 let tempdir = temp_dir::TempDir::new().unwrap();
212
213 fs_err::write(tempdir.child("invalid.txt"), [0x80, 0x80, 0x80]).unwrap();
214
215 let loader = FileLoader::new(tempdir.path()).with_extensions(&["txt"]);
216 let result = loader.into_stream().collect::<Vec<_>>().await;
217
218 assert_eq!(result.len(), 1);
219
220 let first = result.first().unwrap();
221
222 assert_eq!(first.as_ref().unwrap().chunk, "���".to_string());
223 }
224}