swiftide_indexing/loaders/file_loader.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
//! Load files from a directory
use anyhow::Context as _;
use std::path::{Path, PathBuf};
use swiftide_core::{indexing::IndexingStream, indexing::Node, Loader};
/// The `FileLoader` struct is responsible for loading files from a specified directory,
/// filtering them based on their extensions, and creating a stream of these files for further processing.
///
/// # Example
///
/// ```no_run
/// // Create a pipeline that loads the current directory
/// // and indexes all files with the ".rs" extension.
/// # use swiftide_indexing as indexing;
/// # use swiftide_indexing::loaders::FileLoader;
/// indexing::Pipeline::from_loader(FileLoader::new(".").with_extensions(&["rs"]));
/// ```
#[derive(Clone, Debug)]
pub struct FileLoader {
pub(crate) path: PathBuf,
pub(crate) extensions: Option<Vec<String>>,
}
impl FileLoader {
/// Creates a new `FileLoader` with the specified path.
///
/// # Arguments
/// * `path` - The path to the directory to load files from.
///
/// # Returns
/// A new instance of `FileLoader`.
pub fn new(path: impl Into<PathBuf>) -> Self {
Self {
path: path.into(),
extensions: None,
}
}
/// Adds extensions to the loader.
///
/// # Arguments
/// * `extensions` - A list of extensions to add without the leading dot.
///
/// # Returns
/// The `FileLoader` instance with the added extensions.
#[must_use]
pub fn with_extensions(mut self, extensions: &[impl AsRef<str>]) -> Self {
self.extensions = Some(
self.extensions
.unwrap_or_default()
.into_iter()
.chain(extensions.iter().map(|ext| ext.as_ref().to_string()))
.collect(),
);
self
}
/// Lists the nodes (files) that match the specified extensions.
///
/// # Returns
/// A vector of `Node` representing the matching files.
///
/// # Panics
/// This method will panic if it fails to read a file's content.
pub fn list_nodes(&self) -> Vec<Node> {
ignore::Walk::new(&self.path)
.filter_map(Result::ok)
.filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
.filter(move |entry| self.file_has_extension(entry.path()))
.map(ignore::DirEntry::into_path)
.map(|entry| {
tracing::debug!("Reading file: {:?}", entry);
let content = std::fs::read_to_string(&entry).unwrap();
let original_size = content.len();
Node::builder()
.path(entry)
.chunk(content)
.original_size(original_size)
.build()
.expect("Failed to build node")
})
.collect()
}
// Helper function to check if a file has the specified extension.
// If no extensions are specified, this function will return true.
// If the file has no extension, this function will return false.
fn file_has_extension(&self, path: &Path) -> bool {
self.extensions.as_ref().map_or(true, |exts| {
let Some(ext) = path.extension() else {
return false;
};
exts.iter().any(|e| e == ext.to_string_lossy().as_ref())
})
}
}
impl Loader for FileLoader {
/// Converts the `FileLoader` into a stream of `Node`.
///
/// # Returns
/// An `IndexingStream` representing the stream of files.
///
/// # Errors
/// This method will return an error if it fails to read a file's content.
fn into_stream(self) -> IndexingStream {
let files = ignore::Walk::new(&self.path)
.filter_map(Result::ok)
.filter(|entry| entry.file_type().is_some_and(|ft| ft.is_file()))
.filter(move |entry| self.file_has_extension(entry.path()))
.map(|entry| {
tracing::debug!("Reading file: {:?}", entry);
let content =
std::fs::read_to_string(entry.path()).context("Failed to read file")?;
let original_size = content.len();
Node::builder()
.path(entry.path())
.chunk(content)
.original_size(original_size)
.build()
});
IndexingStream::iter(files)
}
fn into_stream_boxed(self: Box<Self>) -> IndexingStream {
self.into_stream()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_with_extensions() {
let loader = FileLoader::new("/tmp").with_extensions(&["rs"]);
assert_eq!(loader.extensions, Some(vec!["rs".to_string()]));
}
}