use anyhow::Result;
use parsoid::prelude::*;
use std::fs;
fn main() -> Result<()> {
let mut entries = fs::read_dir("corpus/")?
.filter_map(|entry| {
let entry = entry.unwrap();
if entry.file_name().to_str().unwrap().ends_with(".html") {
Some(entry.path())
} else {
None
}
})
.collect::<Vec<_>>();
entries.sort();
for entry in entries {
let html = fs::read_to_string(entry)?;
let code = Wikicode::new(&html);
let serialized = code.to_string();
let nodes: Vec<_> = code.descendants().collect();
let mut count = 0;
for template in code.filter_templates()? {
count += 1;
let name = "_parsoid-rs testing";
template.set_param(name, "foo bar baz")?;
template.remove_param(name)?;
}
let links = code.filter_links();
for link in &links {
link.set_target(&link.target());
}
let extlinks = code.filter_external_links();
for extlink in &extlinks {
extlink.set_target(&extlink.target());
}
for category in code.filter_categories() {
let cat = category.category();
category.set_category(&cat);
}
let new_serialized = code.to_string();
if serialized != new_serialized {
let ser_code = Wikicode::new(&serialized);
let ser_temp = ser_code.filter_templates()?;
let new_code = Wikicode::new(&new_serialized);
let new_temp = new_code.filter_templates()?;
for (ser, new) in ser_temp.iter().zip(new_temp.iter()) {
let ser_node = ser
.as_nodes()[0]
.as_element()
.unwrap()
.attributes
.borrow()
.get("data-mw")
.unwrap()
.to_string();
let new_node = new.as_nodes()[0]
.as_element()
.unwrap()
.attributes
.borrow()
.get("data-mw")
.unwrap()
.to_string();
if ser_node != new_node {
println!("{ser_node}");
println!("---");
println!("{new_node}");
println!("---");
}
}
fs::write("old.txt", serialized)?;
fs::write("new.txt", new_serialized)?;
panic!("No match {}", &code.title().unwrap());
}
let link_count = links.len();
println!(
"{}: {} templates, {} links, {} nodes",
&code.title().unwrap(),
count,
link_count,
nodes.len()
);
}
Ok(())
}