pub mod options;
mod renderer;
mod traversal;
mod utils;
#[doc(hidden)]
pub mod alloc_counter;
use std::fs;
use std::path::{Path, PathBuf};
use thiserror::Error;
pub use options::{ConversionMode, ConversionOptions};
#[derive(Error, Debug)]
pub enum MdkaError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
}
#[derive(Debug, Clone)]
pub struct ConvertResult {
pub src: PathBuf,
pub dest: PathBuf,
}
pub fn html_to_markdown(html: &str) -> String {
html_to_markdown_with(html, &ConversionOptions::default())
}
pub fn html_to_markdown_with(html: &str, opts: &ConversionOptions) -> String {
let document = scraper::Html::parse_document(html);
traversal::traverse(&document, opts)
}
pub fn html_file_to_markdown(
path: impl AsRef<Path>,
out_dir: Option<impl AsRef<Path>>,
) -> Result<ConvertResult, MdkaError> {
html_file_to_markdown_with(path, out_dir, &ConversionOptions::default())
}
pub fn html_file_to_markdown_with(
path: impl AsRef<Path>,
out_dir: Option<impl AsRef<Path>>,
opts: &ConversionOptions,
) -> Result<ConvertResult, MdkaError> {
let path = path.as_ref();
let resolved_out_dir = match out_dir {
Some(d) => d.as_ref().to_path_buf(),
None => path
.parent()
.unwrap_or_else(|| Path::new("."))
.to_path_buf(),
};
let dest = do_convert_file(path, &resolved_out_dir, opts)?;
Ok(ConvertResult {
src: path.to_path_buf(),
dest,
})
}
#[cfg(feature = "parallel")]
pub fn html_files_to_markdown<'a, P>(
paths: &'a [P],
out_dir: &Path,
) -> Vec<(&'a P, Result<PathBuf, MdkaError>)>
where
P: AsRef<Path> + Sync,
{
html_files_to_markdown_with(paths, out_dir, &ConversionOptions::default())
}
#[cfg(feature = "parallel")]
pub fn html_files_to_markdown_with<'a, P>(
paths: &'a [P],
out_dir: &Path,
opts: &ConversionOptions,
) -> Vec<(&'a P, Result<PathBuf, MdkaError>)>
where
P: AsRef<Path> + Sync,
{
use rayon::prelude::*;
paths
.par_iter()
.map(|path| (path, do_convert_file(path.as_ref(), out_dir, opts)))
.collect()
}
fn do_convert_file(
src: &Path,
out_dir: &Path,
opts: &ConversionOptions,
) -> Result<PathBuf, MdkaError> {
fs::create_dir_all(out_dir)?;
let html = fs::read_to_string(src)?;
let md = html_to_markdown_with(&html, opts);
let stem = src.file_stem().unwrap_or_default();
let dest = out_dir.join(stem).with_extension("md");
fs::write(&dest, md)?;
Ok(dest)
}