use crate::common::{Error, Result};
#[cfg(feature = "ole")]
use crate::ole;
#[cfg(feature = "ooxml")]
use crate::ooxml;
use std::fs::File;
use std::io::{Cursor, Read, Seek};
use std::path::Path;
#[allow(clippy::large_enum_variant)]
enum DocumentImpl {
#[cfg(feature = "ole")]
Doc(ole::doc::Document, crate::common::Metadata),
#[cfg(feature = "ooxml")]
Docx(Box<ooxml::docx::Document<'static>>, crate::common::Metadata),
}
pub struct Document {
inner: DocumentImpl,
#[cfg(feature = "ooxml")]
_package: Option<Box<ooxml::docx::Package>>,
}
impl Document {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref();
let mut file = File::open(path)?;
let format = detect_document_format(&mut file)?;
match format {
#[cfg(feature = "ole")]
DocumentFormat::Doc => {
let mut package = ole::doc::Package::open(path)
.map_err(Error::from)?;
let doc = package.document()
.map_err(Error::from)?;
let metadata = package.ole_file().get_metadata()
.map(|m| m.into())
.unwrap_or_default();
Ok(Self {
inner: DocumentImpl::Doc(doc, metadata),
#[cfg(feature = "ooxml")]
_package: None,
})
}
#[cfg(not(feature = "ole"))]
DocumentFormat::Doc => {
Err(Error::FeatureDisabled("ole".to_string()))
}
#[cfg(feature = "ooxml")]
DocumentFormat::Docx => {
let package = Box::new(ooxml::docx::Package::open(path)
.map_err(Error::from)?);
let doc_ref = unsafe {
let pkg_ptr = &*package as *const ooxml::docx::Package;
let doc = (*pkg_ptr).document()
.map_err(Error::from)?;
std::mem::transmute::<ooxml::docx::Document<'_>, ooxml::docx::Document<'static>>(doc)
};
let metadata = crate::ooxml::metadata::extract_metadata(package.opc_package())
.unwrap_or_else(|_| crate::common::Metadata::default());
Ok(Self {
inner: DocumentImpl::Docx(Box::new(doc_ref), metadata),
_package: Some(package),
})
}
#[cfg(not(feature = "ooxml"))]
DocumentFormat::Docx => {
Err(Error::FeatureDisabled("ooxml".to_string()))
}
}
}
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self> {
let format = detect_document_format_from_bytes(&bytes)?;
match format {
#[cfg(feature = "ole")]
DocumentFormat::Doc => {
let cursor = Cursor::new(bytes);
let mut package = ole::doc::Package::from_reader(cursor)
.map_err(Error::from)?;
let doc = package.document()
.map_err(Error::from)?;
let metadata = package.ole_file().get_metadata()
.map(|m| m.into())
.unwrap_or_default();
Ok(Self {
inner: DocumentImpl::Doc(doc, metadata),
#[cfg(feature = "ooxml")]
_package: None,
})
}
#[cfg(not(feature = "ole"))]
DocumentFormat::Doc => {
Err(Error::FeatureDisabled("ole".to_string()))
}
#[cfg(feature = "ooxml")]
DocumentFormat::Docx => {
let cursor = Cursor::new(bytes);
let package = Box::new(ooxml::docx::Package::from_reader(cursor)
.map_err(Error::from)?);
let doc_ref = unsafe {
let pkg_ptr = &*package as *const ooxml::docx::Package;
let doc = (*pkg_ptr).document()
.map_err(Error::from)?;
std::mem::transmute::<ooxml::docx::Document<'_>, ooxml::docx::Document<'static>>(doc)
};
let metadata = crate::ooxml::metadata::extract_metadata(package.opc_package())
.unwrap_or_else(|_| crate::common::Metadata::default());
Ok(Self {
inner: DocumentImpl::Docx(Box::new(doc_ref), metadata),
_package: Some(package),
})
}
#[cfg(not(feature = "ooxml"))]
DocumentFormat::Docx => {
Err(Error::FeatureDisabled("ooxml".to_string()))
}
}
}
pub fn text(&self) -> Result<String> {
match &self.inner {
#[cfg(feature = "ole")]
DocumentImpl::Doc(doc, _) => {
doc.text().map_err(Error::from)
}
#[cfg(feature = "ooxml")]
DocumentImpl::Docx(doc, _) => {
doc.text().map_err(Error::from)
}
}
}
pub fn paragraph_count(&self) -> Result<usize> {
match &self.inner {
#[cfg(feature = "ole")]
DocumentImpl::Doc(doc, _) => {
doc.paragraph_count().map_err(Error::from)
}
#[cfg(feature = "ooxml")]
DocumentImpl::Docx(doc, _) => {
doc.paragraph_count().map_err(Error::from)
}
}
}
pub fn paragraphs(&self) -> Result<Vec<Paragraph>> {
match &self.inner {
#[cfg(feature = "ole")]
DocumentImpl::Doc(doc, _) => {
let paras = doc.paragraphs()
.map_err(Error::from)?;
Ok(paras.into_iter().map(Paragraph::Doc).collect())
}
#[cfg(feature = "ooxml")]
DocumentImpl::Docx(doc, _) => {
let paras = doc.paragraphs()
.map_err(Error::from)?;
Ok(paras.into_iter().map(Paragraph::Docx).collect())
}
}
}
pub fn tables(&self) -> Result<Vec<Table>> {
match &self.inner {
#[cfg(feature = "ole")]
DocumentImpl::Doc(doc, _) => {
let tables = doc.tables()
.map_err(Error::from)?;
Ok(tables.into_iter().map(Table::Doc).collect())
}
#[cfg(feature = "ooxml")]
DocumentImpl::Docx(doc, _) => {
let tables = doc.tables()
.map_err(Error::from)?;
Ok(tables.into_iter().map(Table::Docx).collect())
}
}
}
pub fn metadata(&self) -> Result<crate::common::Metadata> {
match &self.inner {
#[cfg(feature = "ole")]
DocumentImpl::Doc(_, metadata) => {
Ok(metadata.clone())
}
#[cfg(feature = "ooxml")]
DocumentImpl::Docx(_, metadata) => {
Ok(metadata.clone())
}
}
}
}
pub enum Paragraph {
#[cfg(feature = "ole")]
Doc(ole::doc::Paragraph),
#[cfg(feature = "ooxml")]
Docx(ooxml::docx::Paragraph),
}
impl Paragraph {
pub fn text(&self) -> Result<String> {
match self {
#[cfg(feature = "ole")]
Paragraph::Doc(p) => p.text().map(|s| s.to_string()).map_err(Error::from),
#[cfg(feature = "ooxml")]
Paragraph::Docx(p) => p.text().map(|s| s.to_string()).map_err(Error::from),
}
}
pub fn runs(&self) -> Result<Vec<Run>> {
match self {
#[cfg(feature = "ole")]
Paragraph::Doc(p) => {
let runs = p.runs().map_err(Error::from)?;
Ok(runs.into_iter().map(Run::Doc).collect())
}
#[cfg(feature = "ooxml")]
Paragraph::Docx(p) => {
let runs = p.runs().map_err(Error::from)?;
Ok(runs.into_iter().map(Run::Docx).collect())
}
}
}
}
pub enum Run {
#[cfg(feature = "ole")]
Doc(ole::doc::Run),
#[cfg(feature = "ooxml")]
Docx(ooxml::docx::Run),
}
impl Run {
pub fn text(&self) -> Result<String> {
match self {
#[cfg(feature = "ole")]
Run::Doc(r) => r.text().map(|s| s.to_string()).map_err(Error::from),
#[cfg(feature = "ooxml")]
Run::Docx(r) => r.text().map(|s| s.to_string()).map_err(Error::from),
}
}
pub fn bold(&self) -> Result<Option<bool>> {
match self {
#[cfg(feature = "ole")]
Run::Doc(r) => Ok(r.bold()),
#[cfg(feature = "ooxml")]
Run::Docx(r) => r.bold().map_err(Error::from),
}
}
pub fn italic(&self) -> Result<Option<bool>> {
match self {
#[cfg(feature = "ole")]
Run::Doc(r) => Ok(r.italic()),
#[cfg(feature = "ooxml")]
Run::Docx(r) => r.italic().map_err(Error::from),
}
}
pub fn strikethrough(&self) -> Result<Option<bool>> {
match self {
#[cfg(feature = "ole")]
Run::Doc(r) => Ok(r.strikethrough()),
#[cfg(feature = "ooxml")]
Run::Docx(r) => r.strikethrough().map_err(Error::from),
}
}
#[cfg(feature = "ole")]
pub fn vertical_position(&self) -> Result<Option<crate::ole::doc::parts::chp::VerticalPosition>> {
match self {
#[cfg(feature = "ole")]
Run::Doc(r) => {
use crate::ole::doc::parts::chp::VerticalPosition;
let pos = match r.properties().vertical_position {
VerticalPosition::Normal => None,
pos => Some(pos),
};
Ok(pos)
}
#[cfg(feature = "ooxml")]
Run::Docx(r) => {
use crate::ooxml::docx::paragraph::VerticalPosition as OoxmlVerticalPosition;
use crate::ole::doc::parts::chp::VerticalPosition as OleVerticalPosition;
match r.vertical_position().map_err(Error::from)? {
Some(OoxmlVerticalPosition::Superscript) => Ok(Some(OleVerticalPosition::Superscript)),
Some(OoxmlVerticalPosition::Subscript) => Ok(Some(OleVerticalPosition::Subscript)),
Some(OoxmlVerticalPosition::Normal) | None => Ok(None),
}
}
}
}
}
pub enum Table {
#[cfg(feature = "ole")]
Doc(ole::doc::Table),
#[cfg(feature = "ooxml")]
Docx(ooxml::docx::Table),
}
impl Table {
pub fn row_count(&self) -> Result<usize> {
match self {
#[cfg(feature = "ole")]
Table::Doc(t) => t.row_count().map_err(Error::from),
#[cfg(feature = "ooxml")]
Table::Docx(t) => t.row_count().map_err(Error::from),
}
}
pub fn rows(&self) -> Result<Vec<Row>> {
match self {
#[cfg(feature = "ole")]
Table::Doc(t) => {
let rows = t.rows().map_err(Error::from)?;
Ok(rows.into_iter().map(Row::Doc).collect())
}
#[cfg(feature = "ooxml")]
Table::Docx(t) => {
let rows = t.rows().map_err(Error::from)?;
Ok(rows.into_iter().map(Row::Docx).collect())
}
}
}
}
pub enum Row {
#[cfg(feature = "ole")]
Doc(ole::doc::Row),
#[cfg(feature = "ooxml")]
Docx(ooxml::docx::Row),
}
impl Row {
pub fn cells(&self) -> Result<Vec<Cell>> {
match self {
#[cfg(feature = "ole")]
Row::Doc(r) => {
let cells = r.cells().map_err(Error::from)?;
Ok(cells.into_iter().map(Cell::Doc).collect())
}
#[cfg(feature = "ooxml")]
Row::Docx(r) => {
let cells = r.cells().map_err(Error::from)?;
Ok(cells.into_iter().map(Cell::Docx).collect())
}
}
}
}
pub enum Cell {
#[cfg(feature = "ole")]
Doc(ole::doc::Cell),
#[cfg(feature = "ooxml")]
Docx(ooxml::docx::Cell),
}
impl Cell {
pub fn text(&self) -> Result<String> {
match self {
#[cfg(feature = "ole")]
Cell::Doc(c) => c.text().map(|s| s.to_string()).map_err(Error::from),
#[cfg(feature = "ooxml")]
Cell::Docx(c) => c.text().map(|s| s.to_string()).map_err(Error::from),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum DocumentFormat {
Doc,
Docx,
}
fn detect_document_format<R: Read + Seek>(reader: &mut R) -> Result<DocumentFormat> {
use std::io::SeekFrom;
let mut header = [0u8; 8];
reader.read_exact(&mut header)?;
reader.seek(SeekFrom::Start(0))?;
detect_document_format_from_signature(&header)
}
#[inline]
fn detect_document_format_from_bytes(bytes: &[u8]) -> Result<DocumentFormat> {
if bytes.len() < 4 {
return Err(Error::InvalidFormat("File too small to determine format".to_string()));
}
detect_document_format_from_signature(&bytes[0..8.min(bytes.len())])
}
#[inline]
fn detect_document_format_from_signature(header: &[u8]) -> Result<DocumentFormat> {
if header.len() >= 4 && header[0..4] == [0xD0, 0xCF, 0x11, 0xE0] {
return Ok(DocumentFormat::Doc);
}
if header.len() >= 4 && header[0..4] == [0x50, 0x4B, 0x03, 0x04] {
return Ok(DocumentFormat::Docx);
}
Err(Error::NotOfficeFile)
}