1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
//! Load files from a directory
use anyhow::Context as _;
use std::path::{Path, PathBuf};
use swiftide_core::{indexing::IndexingStream, indexing::Node, Loader};
/// The `FileLoader` struct is responsible for loading files from a specified directory,
/// filtering them based on their extensions, and creating a stream of these files for further processing.
pub struct FileLoader {
pub(crate) path: PathBuf,
pub(crate) extensions: Option<Vec<String>>,
}
impl FileLoader {
/// Creates a new `FileLoader` with the specified path.
///
/// # Arguments
/// * `path` - The path to the directory to load files from.
///
/// # Returns
/// A new instance of `FileLoader`.
pub fn new(path: impl Into<PathBuf>) -> Self {
Self {
path: path.into(),
extensions: None,
}
}
/// Adds extensions to the loader.
///
/// # Arguments
/// * `extensions` - A list of extensions to add without the leading dot.
///
/// # Returns
/// The `FileLoader` instance with the added extensions.
#[must_use]
pub fn with_extensions(mut self, extensions: &[impl AsRef<str>]) -> Self {
self.extensions = Some(
self.extensions
.unwrap_or_default()
.into_iter()
.chain(extensions.iter().map(|ext| ext.as_ref().to_string()))
.collect(),
);
self
}
/// Lists the nodes (files) that match the specified extensions.
///
/// # Returns
/// A vector of `Node` representing the matching files.
///
/// # Panics
/// This method will panic if it fails to read a file's content.
pub fn list_nodes(&self) -> Vec<Node> {
ignore::Walk::new(&self.path)
.filter_map(Result::ok)
.filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
.filter(move |entry| self.file_has_extension(entry.path()))
.map(ignore::DirEntry::into_path)
.map(|entry| {
tracing::debug!("Reading file: {:?}", entry);
let content = std::fs::read_to_string(&entry).unwrap();
let original_size = content.len();
Node {
path: entry,
chunk: content,
original_size,
..Default::default()
}
})
.collect()
}
// Helper function to check if a file has the specified extension.
// If no extensions are specified, this function will return true.
// If the file has no extension, this function will return false.
fn file_has_extension(&self, path: &Path) -> bool {
self.extensions.as_ref().map_or(true, |exts| {
let Some(ext) = path.extension() else {
return false;
};
exts.iter().any(|e| e == ext.to_string_lossy().as_ref())
})
}
}
impl Loader for FileLoader {
/// Converts the `FileLoader` into a stream of `Node`.
///
/// # Returns
/// An `IndexingStream` representing the stream of files.
///
/// # Errors
/// This method will return an error if it fails to read a file's content.
fn into_stream(self) -> IndexingStream {
let files = ignore::Walk::new(&self.path)
.filter_map(Result::ok)
.filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
.filter(move |entry| self.file_has_extension(entry.path()))
.map(|entry| {
tracing::debug!("Reading file: {:?}", entry);
let content =
std::fs::read_to_string(entry.path()).context("Failed to read file")?;
let original_size = content.len();
Ok(Node {
path: entry.path().into(),
chunk: content,
original_size,
..Default::default()
})
});
IndexingStream::iter(files)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_with_extensions() {
let loader = FileLoader::new("/tmp").with_extensions(&["rs"]);
assert_eq!(loader.extensions, Some(vec!["rs".to_string()]));
}
}