use crate::document_loaders::{DocumentLoader, LoadOptions};
use crate::retriever_engine::Document;
use crate::types::Layer3Result;
use async_trait::async_trait;
use std::path::PathBuf;
#[allow(dead_code)]
pub struct TextLoader {
extensions: Vec<&'static str>,
#[allow(dead_code)]
options: LoadOptions,
}
impl TextLoader {
pub fn new() -> Self {
Self {
extensions: vec!["txt", "text", "log", "md"],
options: LoadOptions::default(),
}
}
pub fn with_options(options: LoadOptions) -> Self {
Self {
extensions: vec!["txt", "text", "log", "md"],
options,
}
}
}
impl Default for TextLoader {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl DocumentLoader for TextLoader {
async fn load(&self, path: PathBuf) -> Layer3Result<Document> {
let content = tokio::fs::read_to_string(&path).await?;
Ok(Document::new(content).with_source(path.to_string_lossy().to_string()))
}
async fn load_and_split(&self, path: PathBuf) -> Layer3Result<Vec<Document>> {
let content = tokio::fs::read_to_string(&path).await?;
let paragraphs: Vec<&str> = content.split("\n\n").collect();
Ok(paragraphs
.into_iter()
.filter(|p| !p.trim().is_empty())
.enumerate()
.map(|(i, p)| {
Document::new(p.to_string()).with_source(format!(
"{}#{}",
path.to_string_lossy(),
i
))
})
.collect())
}
fn supports(&self, path: &std::path::Path) -> bool {
path.extension()
.and_then(|e| e.to_str())
.map(|e| self.extensions.contains(&e))
.unwrap_or(false)
}
fn extensions(&self) -> &[&str] {
&self.extensions
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_loader_extensions() {
let loader = TextLoader::new();
assert!(loader.extensions().contains(&"txt"));
assert!(loader.extensions().contains(&"md"));
}
}