extractous 0.3.0

Extractous provides a fast and efficient way to extract content from all kind of file formats including PDF, Word, Excel CSV, Email etc... Internally it uses a natively compiled Apache Tika for formats are not supported natively by the Rust core
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
use extractous::Extractor;

fn main() {
    // Get the command-line arguments
    let args: Vec<String> = std::env::args().collect();
    let file_path = &args[1];

    // Extract the provided file content to a string
    let extractor = Extractor::new().set_xml_output(true);
    let (content, _metadata) = extractor.extract_file_to_string(file_path).unwrap();
    println!("{}", content);
}