use facet_html_dom::Html;
use std::path::Path;
const SKIP_STACK_OVERFLOW: &[&str] = &[
"https_fasterthanli.me.html",
"https_nltk.org_howto_corpus.html",
"https_stackoverflow.com_questions_53390843_creating-corpus-from-multiple-html-text-files.html",
"https_en.wikipedia.org_wiki_Markup_language.html",
"https_developer.mozilla.org_en-US_docs_Web_HTML.html",
"https_markdownguide.org_basic-syntax.html",
"https_info.arxiv.org_about_accessible_HTML.html",
"https_w3.org_TR_2010_WD-html-markup-20101019.html",
];
fn html_roundtrip_test(path: &Path) -> datatest_stable::Result<()> {
if let Some(filename) = path.file_name().and_then(|f| f.to_str())
&& SKIP_STACK_OVERFLOW.contains(&filename)
{
eprintln!(
"Skipping {} (causes stack overflow, see issue #1582)",
filename
);
return Ok(());
}
let html_str = std::fs::read_to_string(path)?;
let parsed: Html = facet_html::from_str(&html_str)
.map_err(|e| format!("Failed to parse HTML from {}: {}", path.display(), e))?;
let serialized =
facet_html::to_string(&parsed).map_err(|e| format!("Failed to serialize HTML: {}", e))?;
let _reparsed: Html = facet_html::from_str(&serialized)
.map_err(|e| format!("Failed to re-parse serialized HTML: {}", e))?;
let reserialized = facet_html::to_string(&_reparsed)
.map_err(|e| format!("Failed to serialize HTML again: {}", e))?;
assert_eq!(
serialized,
reserialized,
"Serialized HTML should be identical after roundtrip for {}",
path.display()
);
Ok(())
}
datatest_stable::harness! {
{ test = html_roundtrip_test, root = "tests/fixtures", pattern = r".*\.html$" },
}