1mod error;
19pub mod models;
20mod parsers;
21pub mod vfs;
22
23use std::fs::File;
24use std::io::{Cursor, Read, Seek, Write};
25use std::path::Path;
26
27pub use error::{OxdocError, Result};
28pub use models::{
29 DocumentInfo, DocumentType, Extraction, OutputWarning, XlsxCsvOptions, XlsxSheet,
30};
31#[doc(hidden)]
32pub use parsers::docx::fuzz_extract_text as fuzz_docx_text;
33#[doc(hidden)]
34pub use parsers::fuzz_parse_relationships as fuzz_relationships;
35#[doc(hidden)]
36pub use parsers::metadata::fuzz_parse_metadata as fuzz_metadata;
37#[doc(hidden)]
38pub use parsers::pptx::fuzz_extract_text as fuzz_pptx_text;
39#[doc(hidden)]
40pub use parsers::xlsx::{fuzz_parse_shared_strings, fuzz_parse_sheet};
41use parsers::{docx, metadata, pptx, xlsx};
42use vfs::OoxmlPackage;
43
44pub fn extract_docx_text(path: impl AsRef<Path>) -> Result<Extraction<String>> {
45 let file = File::open(path)?;
46 extract_docx_text_from_reader(file)
47}
48
49pub fn extract_docx_text_from_reader<R: Read + Seek>(reader: R) -> Result<Extraction<String>> {
50 let mut package = OoxmlPackage::new(reader)?;
51 docx::extract_text(&mut package)
52}
53
54pub fn extract_pptx_text(path: impl AsRef<Path>) -> Result<Extraction<String>> {
55 let file = File::open(path)?;
56 extract_pptx_text_from_reader(file)
57}
58
59pub fn extract_pptx_text_from_reader<R: Read + Seek>(reader: R) -> Result<Extraction<String>> {
60 let mut package = OoxmlPackage::new(reader)?;
61 pptx::extract_text(&mut package)
62}
63
64pub fn extract_xlsx_csv<W: Write>(
65 path: impl AsRef<Path>,
66 options: XlsxCsvOptions<'_>,
67 writer: W,
68) -> Result<Extraction<()>> {
69 let file = File::open(path)?;
70 extract_xlsx_csv_from_reader(file, options, writer)
71}
72
73pub fn extract_xlsx_csv_from_reader<R: Read + Seek, W: Write>(
74 reader: R,
75 options: XlsxCsvOptions<'_>,
76 writer: W,
77) -> Result<Extraction<()>> {
78 let mut package = OoxmlPackage::new(reader)?;
79 xlsx::write_csv(&mut package, options, writer)
80}
81
82pub fn list_xlsx_sheets(path: impl AsRef<Path>) -> Result<Extraction<Vec<XlsxSheet>>> {
83 let file = File::open(path)?;
84 list_xlsx_sheets_from_reader(file)
85}
86
87pub fn list_xlsx_sheets_from_reader<R: Read + Seek>(
88 reader: R,
89) -> Result<Extraction<Vec<XlsxSheet>>> {
90 let mut package = OoxmlPackage::new(reader)?;
91 xlsx::list_sheets(&mut package)
92}
93
94pub fn detect_document_type(path: impl AsRef<Path>) -> Result<DocumentType> {
95 let file = File::open(path)?;
96 detect_document_type_from_reader(file)
97}
98
99pub fn detect_document_type_from_reader<R: Read>(mut reader: R) -> Result<DocumentType> {
100 let mut bytes = Vec::new();
101 reader.read_to_end(&mut bytes)?;
102 let mut package = OoxmlPackage::new(Cursor::new(bytes))?;
103 parsers::detect_document_type(&mut package)
104}
105
106pub fn read_info(path: impl AsRef<Path>) -> Result<Extraction<DocumentInfo>> {
107 let path = path.as_ref();
108 let file_name = path
109 .file_name()
110 .and_then(|name| name.to_str())
111 .unwrap_or_default()
112 .to_owned();
113
114 let file = File::open(path)?;
115 read_info_from_reader(file, file_name)
116}
117
118pub fn read_info_from_reader<R: Read + Seek>(
119 reader: R,
120 file_name: impl Into<String>,
121) -> Result<Extraction<DocumentInfo>> {
122 let mut package = OoxmlPackage::new(reader)?;
123 metadata::read_info(&mut package, file_name.into())
124}