mod markdown;
mod pdf;
mod plaintext;
mod rust;
mod treesitter;
mod typescript;
pub use markdown::*;
pub use pdf::*;
pub use plaintext::*;
pub use rust::*;
pub use treesitter::*;
pub use typescript::*;
use crate::config::ExtractConfig;
use crate::types::FileType;
use anyhow::Result;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct ExtractedContent {
pub text: String,
pub title: Option<String>,
pub headings: Vec<String>,
pub links: Vec<String>,
pub success: bool,
}
pub fn extract(
path: &Path,
file_type: FileType,
config: &ExtractConfig,
) -> Result<ExtractedContent> {
match file_type {
FileType::Markdown => extract_markdown(path),
FileType::PlainText | FileType::Rst | FileType::Org => extract_plaintext(path),
FileType::Pdf => extract_pdf(path, config),
FileType::Rust => extract_rust(path),
FileType::JavaScript | FileType::TypeScript => extract_typescript(path, false),
FileType::Jsx | FileType::Tsx => extract_typescript(path, true),
FileType::Unknown => extract_plaintext(path),
}
}
#[cfg(test)]
mod tests {
use super::extract;
use crate::config::ExtractConfig;
use crate::types::FileType;
use anyhow::Result;
use std::fs;
#[test]
fn extracts_mjs_via_javascript_path() -> Result<()> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("sample.mjs");
fs::write(
&path,
"import { helper } from './helper.mjs';\nexport function run() {}\n",
)?;
let content = extract(
&path,
FileType::from_extension("mjs"),
&ExtractConfig::default(),
)?;
assert!(content.success);
assert!(content
.headings
.contains(&"export function run".to_string()));
assert!(content.links.contains(&"./helper.mjs".to_string()));
Ok(())
}
#[test]
fn extracts_cjs_via_javascript_path() -> Result<()> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("sample.cjs");
fs::write(&path, "class Worker {}\nmodule.exports = { Worker };\n")?;
let content = extract(
&path,
FileType::from_extension("cjs"),
&ExtractConfig::default(),
)?;
assert!(content.success);
assert!(content.headings.contains(&"class Worker".to_string()));
Ok(())
}
}