use crate::error::Result;
use crate::model::Document;
use crate::render::RenderOptions;
use crate::{FormatType, ParseOptions};
use std::path::Path;
use tokio::fs;
use tokio::io::{AsyncRead, AsyncReadExt};
pub async fn parse_file(path: impl AsRef<Path>) -> Result<Document> {
let data = fs::read(path).await?;
parse_bytes(&data).await
}
pub async fn parse_bytes(data: &[u8]) -> Result<Document> {
let data = data.to_vec();
tokio::task::spawn_blocking(move || crate::parse_bytes(&data))
.await
.map_err(|e| crate::error::Error::Io(std::io::Error::other(e.to_string())))?
}
pub async fn parse_reader<R: AsyncRead + Unpin>(mut reader: R) -> Result<Document> {
let mut data = Vec::new();
reader.read_to_end(&mut data).await?;
parse_bytes(&data).await
}
pub async fn extract_text(path: impl AsRef<Path>) -> Result<String> {
let document = parse_file(path).await?;
Ok(document.plain_text())
}
pub async fn to_markdown(path: impl AsRef<Path>) -> Result<String> {
let document = parse_file(path).await?;
crate::render::render_markdown(&document, &RenderOptions::default())
}
pub async fn to_markdown_with_options(
path: impl AsRef<Path>,
options: &RenderOptions,
) -> Result<String> {
let document = parse_file(path).await?;
let options = options.clone();
tokio::task::spawn_blocking(move || crate::render::render_markdown(&document, &options))
.await
.map_err(|e| crate::error::Error::Io(std::io::Error::other(e.to_string())))?
}
pub async fn detect_format(path: impl AsRef<Path>) -> Result<FormatType> {
let data = fs::read(path).await?;
crate::detect_format_from_bytes(&data)
}
pub struct AsyncUnhwp {
render_options: RenderOptions,
parse_options: ParseOptions,
extract_images: bool,
}
impl Default for AsyncUnhwp {
fn default() -> Self {
Self::new()
}
}
impl AsyncUnhwp {
pub fn new() -> Self {
Self {
render_options: RenderOptions::default(),
parse_options: ParseOptions::default(),
extract_images: false,
}
}
pub fn with_images(mut self, extract: bool) -> Self {
self.extract_images = extract;
self
}
pub fn with_image_dir(mut self, dir: impl Into<std::path::PathBuf>) -> Self {
self.render_options.image_dir = Some(dir.into());
self
}
pub fn with_frontmatter(mut self) -> Self {
self.render_options.include_frontmatter = true;
self
}
pub fn lenient(mut self) -> Self {
self.parse_options = self.parse_options.lenient();
self
}
pub async fn parse(self, path: impl AsRef<Path>) -> Result<AsyncParsedDocument> {
let document = parse_file(path).await?;
Ok(AsyncParsedDocument {
document,
render_options: self.render_options,
extract_images: self.extract_images,
})
}
}
pub struct AsyncParsedDocument {
document: Document,
render_options: RenderOptions,
extract_images: bool,
}
impl AsyncParsedDocument {
pub fn document(&self) -> &Document {
&self.document
}
pub async fn to_markdown(&self) -> Result<String> {
if self.extract_images {
if let Some(ref image_dir) = self.render_options.image_dir {
fs::create_dir_all(image_dir).await?;
for (name, resource) in &self.document.resources {
let path = image_dir.join(name);
fs::write(path, &resource.data).await?;
}
}
}
let document = self.document.clone();
let options = self.render_options.clone();
tokio::task::spawn_blocking(move || crate::render::render_markdown(&document, &options))
.await
.map_err(|e| crate::error::Error::Io(std::io::Error::other(e.to_string())))?
}
pub fn to_text(&self) -> String {
self.document.plain_text()
}
pub fn into_document(self) -> Document {
self.document
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_detect_format_bytes() {
let hwpx_magic = [0x50, 0x4B, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00];
let format = crate::detect_format_from_bytes(&hwpx_magic).unwrap();
assert_eq!(format, FormatType::Hwpx);
}
}