leann_core/document_loaders/
mod.rs1#[cfg(feature = "pdf")]
7pub mod pdf;
8
9use anyhow::Result;
10use std::path::Path;
11
12pub fn extract_text(path: &Path) -> Result<Option<String>> {
19 let ext = path
20 .extension()
21 .map(|e| e.to_string_lossy().to_lowercase())
22 .unwrap_or_default();
23
24 match ext.as_str() {
25 #[cfg(feature = "pdf")]
26 "pdf" => match pdf::extract_pdf_text(path) {
27 Ok(text) if !text.trim().is_empty() => Ok(Some(text)),
28 Ok(_) => {
29 tracing::warn!("PDF has no extractable text: {}", path.display());
30 Ok(None)
31 }
32 Err(e) => {
33 tracing::warn!("Failed to extract text from PDF {}: {}", path.display(), e);
34 Ok(None)
35 }
36 },
37 #[cfg(not(feature = "pdf"))]
38 "pdf" => {
39 tracing::warn!(
40 "PDF support not enabled. Rebuild with `pdf` feature to load: {}",
41 path.display()
42 );
43 Ok(None)
44 }
45 _ => {
46 match std::fs::read_to_string(path) {
48 Ok(content) if !content.trim().is_empty() => Ok(Some(content)),
49 Ok(_) => Ok(None),
50 Err(e) => {
51 tracing::debug!("Could not read {} as text: {}", path.display(), e);
52 Ok(None)
53 }
54 }
55 }
56 }
57}
58
59pub fn is_binary_document(path: &Path) -> bool {
62 let ext = path
63 .extension()
64 .map(|e| e.to_string_lossy().to_lowercase())
65 .unwrap_or_default();
66 matches!(ext.as_str(), "pdf")
67}