use serde::Serialize;
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub struct ExtractResult {
pub content_text: String,
pub comments_text: String,
pub content_html: String,
pub comments_html: String,
pub metadata: Metadata,
}
#[cfg(feature = "markdown")]
use html2markdown::Options as MarkdownOptions;
#[cfg(feature = "markdown")]
impl ExtractResult {
pub fn content_markdown(&self) -> String {
html2markdown::convert(&self.content_html)
}
pub fn content_markdown_with(&self, options: &MarkdownOptions) -> String {
html2markdown::convert_with(&self.content_html, options)
}
pub fn comments_markdown(&self) -> String {
html2markdown::convert(&self.comments_html)
}
pub fn comments_markdown_with(&self, options: &MarkdownOptions) -> String {
html2markdown::convert_with(&self.comments_html, options)
}
}
#[derive(Debug, Clone, Default, Serialize)]
#[non_exhaustive]
pub struct Metadata {
pub title: String,
pub author: String,
pub url: String,
pub hostname: String,
pub description: String,
pub sitename: String,
pub date: Option<chrono::NaiveDate>,
pub categories: Vec<String>,
pub tags: Vec<String>,
pub id: String,
pub fingerprint: String,
pub license: String,
pub language: String,
pub image: String,
pub page_type: String,
}
#[cfg(test)]
#[cfg(feature = "markdown")]
mod markdown_tests {
use super::*;
#[test]
fn test_content_markdown() {
let result = ExtractResult {
content_html: "<h1>Title</h1><p>Hello <strong>world</strong></p>".into(),
..Default::default()
};
let md = result.content_markdown();
assert!(md.contains("# Title"));
assert!(md.contains("**world**"));
}
#[test]
fn test_comments_markdown() {
let result = ExtractResult {
comments_html: "<p>A <em>great</em> comment</p>".into(),
..Default::default()
};
let md = result.comments_markdown();
assert!(md.contains("*great*"));
}
#[test]
fn test_empty_html_produces_empty_markdown() {
let result = ExtractResult::default();
assert_eq!(result.content_markdown(), "");
assert_eq!(result.comments_markdown(), "");
}
#[test]
fn test_content_markdown_with_custom_options() {
let result = ExtractResult {
content_html: "<ul><li>one</li><li>two</li></ul>".into(),
..Default::default()
};
let opts = html2markdown::Options::new().with_bullet('-');
let md = result.content_markdown_with(&opts);
assert!(md.contains("- one"));
assert!(md.contains("- two"));
}
}