use std::path::PathBuf;
#[derive(Clone, Debug)]
pub struct Document {
pub id: Option<String>,
pub title: Option<String>,
pub content: String,
pub source: Option<PathBuf>,
}
impl Document {
pub fn new(content: String) -> Self {
Self {
id: None,
title: None,
content,
source: None,
}
}
pub fn with_title(title: String, content: String) -> Self {
Self {
id: None,
title: Some(title),
content,
source: None,
}
}
}
pub trait CorpusReader: Send + Sync {
fn documents(&self) -> Box<dyn Iterator<Item = Document> + Send + '_>;
fn sentences(&self) -> Box<dyn Iterator<Item = String> + Send + '_>;
fn estimated_tokens(&self) -> Option<usize> {
None
}
fn document_count(&self) -> Option<usize> {
None
}
}
impl CorpusReader for Box<dyn CorpusReader> {
fn documents(&self) -> Box<dyn Iterator<Item = Document> + Send + '_> {
(**self).documents()
}
fn sentences(&self) -> Box<dyn Iterator<Item = String> + Send + '_> {
(**self).sentences()
}
fn estimated_tokens(&self) -> Option<usize> {
(**self).estimated_tokens()
}
fn document_count(&self) -> Option<usize> {
(**self).document_count()
}
}