pub mod animation;
pub mod batch;
pub mod browser;
pub mod extract_images;
pub mod figures;
pub mod gdocs;
pub mod html;
pub mod latex;
pub mod localize_images;
pub mod markdown;
pub mod metadata;
pub mod postprocess;
pub mod themed_image;
pub mod verify;
use thiserror::Error;
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
#[derive(Error, Debug)]
pub enum WebCaptureError {
#[error("Failed to fetch URL: {0}")]
FetchError(String),
#[error("Failed to parse HTML: {0}")]
ParseError(String),
#[error("Failed to convert to Markdown: {0}")]
MarkdownError(String),
#[error("Failed to capture screenshot: {0}")]
ScreenshotError(String),
#[error("Browser error: {0}")]
BrowserError(String),
#[error("Invalid URL: {0}")]
InvalidUrl(String),
#[error("IO error: {0}")]
IoError(#[from] std::io::Error),
#[error("Request error: {0}")]
RequestError(#[from] reqwest::Error),
}
pub type Result<T> = std::result::Result<T, WebCaptureError>;
pub async fn fetch_html(url: &str) -> Result<String> {
html::fetch_html(url).await
}
pub async fn render_html(url: &str) -> Result<String> {
browser::render_html(url).await
}
pub fn convert_html_to_markdown(html: &str, base_url: Option<&str>) -> Result<String> {
markdown::convert_html_to_markdown(html, base_url)
}
pub async fn capture_screenshot(url: &str) -> Result<Vec<u8>> {
browser::capture_screenshot(url).await
}
#[must_use]
pub fn convert_relative_urls(html: &str, base_url: &str) -> String {
html::convert_relative_urls(html, base_url)
}
#[must_use]
pub fn convert_to_utf8(html: &str) -> String {
html::convert_to_utf8(html)
}
#[allow(clippy::struct_excessive_bools)]
#[derive(Debug, Clone)]
pub struct EnhancedOptions {
pub extract_latex: bool,
pub extract_metadata: bool,
pub post_process: bool,
pub detect_code_language: bool,
}
impl Default for EnhancedOptions {
fn default() -> Self {
Self {
extract_latex: true,
extract_metadata: true,
post_process: true,
detect_code_language: true,
}
}
}
#[derive(Debug, Clone)]
pub struct EnhancedMarkdownResult {
pub markdown: String,
pub metadata: Option<metadata::ArticleMetadata>,
}
pub fn convert_html_to_markdown_enhanced(
html: &str,
base_url: Option<&str>,
options: &EnhancedOptions,
) -> Result<EnhancedMarkdownResult> {
let mut md = markdown::convert_html_to_markdown(html, base_url)?;
let extracted_metadata = if options.extract_metadata {
let meta = metadata::extract_metadata(html);
let header_lines = metadata::format_metadata_block(&meta);
if !header_lines.is_empty() {
let header = header_lines.join("\n");
if let Some(pos) = md.find("\n\n") {
md = format!("{}\n\n{}\n{}", &md[..pos], header, &md[pos + 2..]);
} else {
md = format!("{header}\n\n{md}");
}
}
let footer_lines = metadata::format_footer_block(&meta);
if !footer_lines.is_empty() {
md.push_str("\n\n");
md.push_str(&footer_lines.join("\n"));
}
Some(meta)
} else {
None
};
if options.post_process {
md = postprocess::post_process_markdown(&md, &postprocess::PostProcessOptions::default());
}
Ok(EnhancedMarkdownResult {
markdown: md,
metadata: extracted_metadata,
})
}
pub use browser::BrowserEngine;