use crate::loaders::file::FileLoaderError;
use epub::doc::EpubDoc;
use std::fs::File;
use std::io::BufReader;
use std::marker::PhantomData;
use std::path::PathBuf;
use super::RawTextProcessor;
use super::errors::EpubLoaderError;
use super::text_processors::TextProcessor;
pub(crate) trait Loadable {
fn load(self) -> Result<EpubDoc<BufReader<File>>, EpubLoaderError>;
fn load_with_path(self) -> Result<(PathBuf, EpubDoc<BufReader<File>>), EpubLoaderError>;
}
impl Loadable for PathBuf {
fn load(self) -> Result<EpubDoc<BufReader<File>>, EpubLoaderError> {
EpubDoc::new(self).map_err(EpubLoaderError::EpubError)
}
fn load_with_path(self) -> Result<(PathBuf, EpubDoc<BufReader<File>>), EpubLoaderError> {
let contents = EpubDoc::new(&self).map_err(EpubLoaderError::EpubError);
Ok((self, contents?))
}
}
impl<T: Loadable> Loadable for Result<T, EpubLoaderError> {
fn load(self) -> Result<EpubDoc<BufReader<File>>, EpubLoaderError> {
self.map(|t| t.load())?
}
fn load_with_path(self) -> Result<(PathBuf, EpubDoc<BufReader<File>>), EpubLoaderError> {
self.map(|t| t.load_with_path())?
}
}
pub struct EpubFileLoader<'a, T, P = RawTextProcessor> {
iterator: Box<dyn Iterator<Item = T> + 'a>,
_processor: PhantomData<P>,
}
type EpubLoaded = Result<(PathBuf, EpubDoc<BufReader<File>>), EpubLoaderError>;
impl<'a, P> EpubFileLoader<'a, Result<PathBuf, EpubLoaderError>, P> {
pub fn load(self) -> EpubFileLoader<'a, Result<EpubDoc<BufReader<File>>, EpubLoaderError>, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.map(|res| res.load())),
_processor: PhantomData,
}
}
pub fn load_with_path(self) -> EpubFileLoader<'a, EpubLoaded, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.map(|res| res.load_with_path())),
_processor: PhantomData,
}
}
}
impl<'a, P> EpubFileLoader<'a, Result<PathBuf, EpubLoaderError>, P>
where
P: TextProcessor,
{
pub fn read(self) -> EpubFileLoader<'a, Result<String, EpubLoaderError>, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.map(|res| {
let doc = res.load().map(EpubChapterIterator::<P>::from)?;
Ok(doc
.into_iter()
.collect::<Result<Vec<String>, EpubLoaderError>>()?
.into_iter()
.collect::<String>())
})),
_processor: PhantomData,
}
}
pub fn read_with_path(
self,
) -> EpubFileLoader<'a, Result<(PathBuf, String), EpubLoaderError>, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.map(|res| {
let (path, doc) = res.load_with_path()?;
let content = EpubChapterIterator::<P>::from(doc)
.collect::<Result<Vec<String>, EpubLoaderError>>()?
.into_iter()
.collect::<String>();
Ok((path, content))
})),
_processor: PhantomData,
}
}
}
impl<'a, P> EpubFileLoader<'a, EpubDoc<BufReader<File>>, P>
where
P: TextProcessor + 'a,
{
pub fn by_chapter(self) -> EpubFileLoader<'a, Result<String, EpubLoaderError>, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.flat_map(EpubChapterIterator::<P>::from)),
_processor: PhantomData,
}
}
}
type ByChapter = (PathBuf, Vec<(usize, Result<String, EpubLoaderError>)>);
impl<'a, P: TextProcessor> EpubFileLoader<'a, (PathBuf, EpubDoc<BufReader<File>>), P> {
pub fn by_chapter(self) -> EpubFileLoader<'a, ByChapter, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.map(|doc| {
let (path, doc) = doc;
(
path,
EpubChapterIterator::<P>::from(doc)
.enumerate()
.collect::<Vec<_>>(),
)
})),
_processor: PhantomData,
}
}
}
impl<'a, P> EpubFileLoader<'a, ByChapter, P>
where
P: TextProcessor,
{
pub fn ignore_errors(self) -> EpubFileLoader<'a, (PathBuf, Vec<(usize, String)>), P> {
EpubFileLoader {
iterator: Box::new(self.iterator.map(|(path, chapters)| {
let chapters = chapters
.into_iter()
.filter_map(|(idx, res)| res.ok().map(|content| (idx, content)))
.collect::<Vec<_>>();
(path, chapters)
})),
_processor: PhantomData,
}
}
}
impl<'a, P, T: 'a> EpubFileLoader<'a, Result<T, EpubLoaderError>, P> {
pub fn ignore_errors(self) -> EpubFileLoader<'a, T, P> {
EpubFileLoader {
iterator: Box::new(self.iterator.filter_map(|res| res.ok())),
_processor: PhantomData,
}
}
}
impl<P> EpubFileLoader<'_, Result<PathBuf, FileLoaderError>, P> {
pub fn with_glob(
pattern: &str,
) -> Result<EpubFileLoader<'_, Result<PathBuf, EpubLoaderError>, P>, EpubLoaderError> {
let paths = glob::glob(pattern).map_err(FileLoaderError::PatternError)?;
Ok(EpubFileLoader {
iterator: Box::new(paths.into_iter().map(|path| {
path.map_err(FileLoaderError::GlobError)
.map_err(EpubLoaderError::FileLoaderError)
})),
_processor: PhantomData,
})
}
pub fn with_dir(
directory: &str,
) -> Result<EpubFileLoader<'_, Result<PathBuf, EpubLoaderError>, P>, EpubLoaderError> {
let paths = std::fs::read_dir(directory).map_err(FileLoaderError::IoError)?;
Ok(EpubFileLoader {
iterator: Box::new(
paths
.into_iter()
.map(|entry| Ok(entry.map_err(FileLoaderError::IoError)?.path())),
),
_processor: PhantomData,
})
}
}
pub struct IntoIter<'a, T> {
iterator: Box<dyn Iterator<Item = T> + 'a>,
}
impl<'a, T, P> IntoIterator for EpubFileLoader<'a, T, P> {
type Item = T;
type IntoIter = IntoIter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
IntoIter {
iterator: self.iterator,
}
}
}
impl<T> Iterator for IntoIter<'_, T> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
self.iterator.next()
}
}
struct EpubChapterIterator<P> {
epub: EpubDoc<BufReader<File>>,
finished: bool,
_processor: PhantomData<P>,
}
impl<P> From<EpubDoc<BufReader<File>>> for EpubChapterIterator<P> {
fn from(epub: EpubDoc<BufReader<File>>) -> Self {
Self::new(epub)
}
}
impl<P> EpubChapterIterator<P> {
fn new(epub: EpubDoc<BufReader<File>>) -> Self {
Self {
epub,
finished: false,
_processor: PhantomData,
}
}
}
impl<P> Iterator for EpubChapterIterator<P>
where
P: TextProcessor,
{
type Item = Result<String, EpubLoaderError>;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
while !self.finished {
let chapter = self.epub.get_current_str();
if !self.epub.go_next() {
self.finished = true;
}
if let Some((text, _)) = chapter {
return Some(
P::process(&text)
.map_err(|err| EpubLoaderError::TextProcessorError(Box::new(err))),
);
}
}
None
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::loaders::epub::RawTextProcessor;
use super::EpubFileLoader;
#[test]
fn test_epub_loader_with_errors() {
let loader = EpubFileLoader::<_, RawTextProcessor>::with_glob("tests/data/*.epub").unwrap();
let actual = loader
.load_with_path()
.ignore_errors()
.by_chapter()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(actual.len(), 1);
let (_, chapters) = &actual[0];
assert_eq!(chapters.len(), 3);
for chapter in chapters {
assert!(chapter.1.is_ok());
}
}
#[test]
fn test_epub_loader_with_ignoring_errors() {
let loader = EpubFileLoader::<_, RawTextProcessor>::with_glob("tests/data/*.epub").unwrap();
let actual = loader
.load_with_path()
.ignore_errors()
.by_chapter()
.ignore_errors()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(actual.len(), 1);
let (_, chapters) = &actual[0];
assert_eq!(chapters.len(), 3);
}
#[test]
fn test_single_file() {
let loader = EpubFileLoader::<_, RawTextProcessor>::with_glob("tests/data/*.epub").unwrap();
let actual = loader
.read()
.ignore_errors()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(actual.len(), 1);
}
#[test]
fn test_single_file_with_path() {
let loader = EpubFileLoader::<_, RawTextProcessor>::with_glob("tests/data/*.epub").unwrap();
let actual = loader
.read_with_path()
.ignore_errors()
.into_iter()
.collect::<Vec<_>>();
assert_eq!(actual.len(), 1);
let (path, _) = &actual[0];
assert_eq!(path, &PathBuf::from("tests/data/dummy.epub"));
}
}