sh_layer3/document_loaders/
mod.rs1pub mod csv;
6pub mod json;
7pub mod markdown;
8pub mod pdf;
9pub mod text;
10
11use crate::retriever_engine::Document;
12use crate::types::Layer3Result;
13use async_trait::async_trait;
14use std::path::PathBuf;
15
16pub use csv::CsvLoader;
18pub use json::JsonLoader;
19pub use markdown::MarkdownLoader;
20pub use pdf::PdfLoader;
21pub use text::TextLoader;
22
23#[async_trait]
27pub trait DocumentLoader: Send + Sync {
28 async fn load(&self, path: PathBuf) -> Layer3Result<Document>;
30
31 async fn load_and_split(&self, path: PathBuf) -> Layer3Result<Vec<Document>>;
33
34 fn supports(&self, path: &std::path::Path) -> bool;
36
37 fn extensions(&self) -> &[&str];
39}
40
41#[async_trait]
43pub trait BatchLoader: DocumentLoader {
44 async fn load_directory(&self, dir: PathBuf, recursive: bool) -> Layer3Result<Vec<Document>>;
46
47 async fn load_batch(&self, paths: Vec<PathBuf>) -> Layer3Result<Vec<(PathBuf, Document)>>;
49}
50
51pub struct LoaderRegistry {
53 loaders: Vec<Box<dyn DocumentLoader>>,
54}
55
56impl LoaderRegistry {
57 pub fn new() -> Self {
58 Self {
59 loaders: Vec::new(),
60 }
61 }
62
63 pub fn register(&mut self, loader: Box<dyn DocumentLoader>) {
64 self.loaders.push(loader);
65 }
66
67 pub fn get_loader(&self, path: &std::path::Path) -> Option<&dyn DocumentLoader> {
68 self.loaders
69 .iter()
70 .find(|l| l.supports(path))
71 .map(|l| l.as_ref())
72 }
73
74 pub fn load(&self, path: PathBuf) -> Layer3Result<Document> {
75 let loader = self
76 .get_loader(&path)
77 .ok_or_else(|| anyhow::anyhow!("No loader for: {:?}", path))?;
78
79 futures::executor::block_on(loader.load(path))
81 }
82}
83
84impl Default for LoaderRegistry {
85 fn default() -> Self {
86 Self::new()
87 }
88}
89
90#[derive(Debug, Clone, Default)]
92pub struct LoadOptions {
93 pub encoding: Option<String>,
95 pub max_size: Option<u64>,
97 pub extract_metadata: bool,
99 pub parse_options: serde_json::Map<String, serde_json::Value>,
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 #[test]
108 fn test_loader_registry() {
109 let registry = LoaderRegistry::new();
110 assert!(registry.loaders.is_empty());
111 }
112}