pub struct Crawler<A, C> { /* private fields */ }Expand description
The core of this library.
Create one with Crawler::new or Crawler::new_async
to get started. Also see the examples.
Implementations§
Source§impl<C> Crawler<NonAsync, C>
impl<C> Crawler<NonAsync, C>
Sourcepub fn start_dir<P: AsRef<Path>>(self, path: P) -> Self
pub fn start_dir<P: AsRef<Path>>(self, path: P) -> Self
Sets the directory the crawler should start in. Default is the current directory, resolved when
Crawler::run is called, if that fails, it panics before doing anything.
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::Mutex;
//Assuming that the content of C:\foo isn't changing during execution
//and this program is executed in that same folder
let crawler_1_result =
Crawler::new()
.start_dir("C:\\foo")
.context(Mutex::new(Vec::new()))
.run(|ctx: Mutex<Vec<String>>, path| {
ctx.lock().unwrap().insert(path.display());
});
let crawler_2_result =
Crawler::new()
.context(Mutex::new(Vec::new()))
.run(|ctx: Mutex<HashSet<String>>, path| {
ctx.lock().unwrap().insert(path.display());
})?;
//then (not guaranteed when using a Vec instead of a HashSet by design)
assert_eq!(crawler_1_result, crawler_2_result);Sourcepub fn file_regex<STR: AsRef<str>>(self, regex: STR) -> Self
pub fn file_regex<STR: AsRef<str>>(self, regex: STR) -> Self
Sourcepub fn folder_regex<STR: AsRef<str>>(self, regex: STR) -> Self
pub fn folder_regex<STR: AsRef<str>>(self, regex: STR) -> Self
Only go into a folder if matches the given regex (meaning all files and subfolders etc. will not be traversed)
//given this folder structure:
//foo
// |--bar
// | |--foo.txt
// |--foobar
// | |---barbar
// | |---baz.txt
// |--foo
// |--baz.txt
//this prints *only* baz.txt because the regex matches "foo", but not "bar" or "barbar" AND "foobar"
Crawler::new()
.start_dir("path\\to\\foo")
.folder_regex("foo")
.run(|_, path| {
println!("{}", path.display());
})?;Sourcepub fn search_depth(self, depth: u32) -> Self
pub fn search_depth(self, depth: u32) -> Self
How deep (in terms of folder layers over each other) the Crawler should go
//prints all text files in the current directory, but not its subfolders
Crawler::new()
//exchanging the 0 with a 1 mean that it also traverses the subfolders, but not their subfolders
.search_depth(0)
.file_regex(r"^.*\.txt$")
.run(|_, path| {
println!("{}", path.display());
})?;Sourcepub fn context<CNEW: Send + Sync>(
self,
context: CNEW,
) -> Crawler<NonAsync, CNEW>
pub fn context<CNEW: Send + Sync>( self, context: CNEW, ) -> Crawler<NonAsync, CNEW>
Adds a context ( = a value that is passed to the closure on every invocation via an Arc) with the type CNEW.
It is returned from the run function after execution.
Defaults to the zero-sized NoContext.
use std::sync::atomic::AtomicU16;
//bind the context to a variable
let result =
Crawler::new()
//adds a counter (for everything not representable with Atomics, a Mutex is recommended)
.context(AtomicU16::new(0))
.run(|_, path| {
println!("{}", path.display());
})?;
println!("{} files in the current directory")Sourcepub fn run<A, E>(self, action: A) -> Result<C, Box<dyn Error + Send + 'static>>
pub fn run<A, E>(self, action: A) -> Result<C, Box<dyn Error + Send + 'static>>
Runs the (modified) Crawler returned from Crawler::new, execution a closure that’s passed
a Context and the path of the file for every file in the specified directory. For exceptions,
see search_depth, file_regex and folder_regex.
use file_crawler::prelude::*;
use std::path::PathBuf;
Crawler::new()
.start_dir("C\\user\\foo")
.file_regex(r"^.*\.txt$")
.search_depth(3)
.run(|_, path| {
println!("{}", path.display());
Ok(())
})?;Source§impl<C> Crawler<Async, C>
impl<C> Crawler<Async, C>
Sourcepub fn file_regex<STR: AsRef<str>>(self, regex: STR) -> Self
pub fn file_regex<STR: AsRef<str>>(self, regex: STR) -> Self
See Crawler::file_regex.
Sourcepub fn folder_regex<STR: AsRef<str>>(self, regex: STR) -> Self
pub fn folder_regex<STR: AsRef<str>>(self, regex: STR) -> Self
Sourcepub fn search_depth(self, depth: u32) -> Self
pub fn search_depth(self, depth: u32) -> Self
Sourcepub fn context<CNEW: Send + Sync + 'static>(
self,
context: CNEW,
) -> Crawler<Async, CNEW>
pub fn context<CNEW: Send + Sync + 'static>( self, context: CNEW, ) -> Crawler<Async, CNEW>
See Crawler::context.
Sourcepub async fn run<Fun, Fut, E>(
self,
action: Fun,
) -> Result<C, Box<dyn Error + Send + 'static>>
pub async fn run<Fun, Fut, E>( self, action: Fun, ) -> Result<C, Box<dyn Error + Send + 'static>>
Runs a (modified) asynchronous file crawler from Crawler::new_async using tokio.
Requires an at least two-threaded runtime (3).
Otherwise, the same as the synchronous version. It is recommended to use the exposed tokio (through the prelude) dependency instead of std when possible.
use file_crawler::prelude::*;
use std::path::PathBuf;
Crawler::new()
.start_dir("C\\user\\foo")
.file_regex(r"^.*\.txt$")
.run(|_, path| {
let contents=String::new();
let file=tokio::fs::File::open(&path).await?;
file.read_to_string(&mut contents).await?;
println!("{}:\n{}", path.display(), contents);
Ok(())
})?;Trait Implementations§
Auto Trait Implementations§
impl<A, C> Freeze for Crawler<A, C>where
C: Freeze,
impl<A, C> RefUnwindSafe for Crawler<A, C>where
C: RefUnwindSafe,
A: RefUnwindSafe,
impl<A, C> Send for Crawler<A, C>
impl<A, C> Sync for Crawler<A, C>
impl<A, C> Unpin for Crawler<A, C>
impl<A, C> UnwindSafe for Crawler<A, C>where
C: UnwindSafe,
A: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more