use std::collections::HashSet;
use std::fs;
use std::path::PathBuf;
fn workspace_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.canonicalize()
.expect("workspace root")
}
fn manifest_path() -> PathBuf {
workspace_root().join("tools/pdfluent-snippet-extract/manifest.toml")
}
fn web_examples_dir() -> PathBuf {
workspace_root().join("crates/pdfluent/tests/web_examples")
}
const NATURALLY_RUNNABLE: &[&str] = &[
"compress_pdf_rust",
"convert_pdf_to_docx_rust",
"encrypt_pdf_rust",
"extract_text_pdf_rust",
"fill_pdf_form_rust",
"insert_image_pdf_rust",
"merge_pdfs_rust",
"render_pdf_to_jpeg_rust",
"subset_fonts_rust",
];
const DEFERRED_RUNTIME: &[(&str, &str)] = &[];
const PLACEHOLDERS_NOT_IN_MANIFEST: &[&str] = &[
"compress_pdf_rust",
"convert_pdf_to_docx_rust",
"insert_image_pdf_rust",
"render_pdf_to_jpeg_rust",
"subset_fonts_rust",
];
const LEGACY_IGNORED_WITH_COMPANION_COVERAGE: &[&str] = &["render_pdf_to_png_rust"];
fn parse_manifest_slugs(manifest_src: &str) -> Vec<String> {
let mut slugs = Vec::new();
let mut in_page = false;
for raw in manifest_src.lines() {
let line = raw.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
if line.starts_with("[[page]]") {
in_page = true;
continue;
}
if line.starts_with('[') {
in_page = false;
continue;
}
if !in_page {
continue;
}
if let Some(rest) = line.strip_prefix("slug") {
let rest = rest.trim_start_matches([' ', '=']);
let unquoted = rest.trim().trim_start_matches('"').trim_end_matches('"');
if !unquoted.is_empty() {
slugs.push(unquoted.to_owned());
}
}
}
slugs
}
fn file_exists_for(slug: &str) -> bool {
web_examples_dir().join(format!("{slug}.rs")).exists()
}
fn file_body(slug: &str) -> String {
fs::read_to_string(web_examples_dir().join(format!("{slug}.rs")))
.expect("web_example file readable")
}
#[test]
fn every_manifest_slug_has_a_web_example_file() {
let src = fs::read_to_string(manifest_path()).expect("manifest readable");
let slugs = parse_manifest_slugs(&src);
assert!(!slugs.is_empty(), "manifest parsed 0 slugs — parser broken");
let missing: Vec<_> = slugs
.iter()
.filter(|s| !file_exists_for(s))
.cloned()
.collect();
assert!(
missing.is_empty(),
"manifest entries without a matching tests/web_examples/*.rs: {missing:?}",
);
}
#[test]
fn every_web_example_file_has_a_compile_test() {
for slug in discover_all_slugs() {
let body = file_body(&slug);
assert!(
body.contains("_compiles"),
"{slug}.rs has no `_compiles` test — extractor contract violated",
);
}
}
#[test]
fn every_slug_is_explicitly_classified() {
let runnable: HashSet<&str> = NATURALLY_RUNNABLE.iter().copied().collect();
let deferred: HashSet<&str> = DEFERRED_RUNTIME.iter().map(|(s, _)| *s).collect();
let legacy: HashSet<&str> = LEGACY_IGNORED_WITH_COMPANION_COVERAGE
.iter()
.copied()
.collect();
let mut problems: Vec<String> = Vec::new();
for slug in discover_all_slugs() {
let r = runnable.contains(slug.as_str());
let d = deferred.contains(slug.as_str());
let l = legacy.contains(slug.as_str());
let hits = [r, d, l].into_iter().filter(|b| *b).count();
if hits != 1 {
problems.push(format!(
"{slug}: classified in {hits} categories (runnable={r}, deferred={d}, legacy={l})"
));
}
}
assert!(
problems.is_empty(),
"slug classification gaps:\n{}",
problems.join("\n")
);
}
#[test]
fn naturally_runnable_slugs_match_master_reality() {
for slug in NATURALLY_RUNNABLE {
let body = file_body(slug);
let idx = body
.find(&format!("fn {slug}_runs"))
.or_else(|| {
let abbreviated = slug.replace("_pdf", "");
body.find(&format!("fn {abbreviated}_runs"))
})
.or_else(|| body.find("_runs()"));
let idx = idx.unwrap_or_else(|| {
panic!(
"{slug} is listed in NATURALLY_RUNNABLE but has no _runs \
function in tests/web_examples/{slug}.rs. Either add \
the test or move the slug to DEFERRED_RUNTIME."
)
});
let window_start = idx.saturating_sub(120);
let window = &body[window_start..idx];
assert!(
!window.contains("#[ignore"),
"{slug} is in NATURALLY_RUNNABLE but its _runs test is `#[ignore]`d. \
Either wire it (remove ignore) or move it to DEFERRED_RUNTIME.",
);
}
}
#[test]
fn coverage_summary() {
let all = discover_all_slugs();
let runnable = NATURALLY_RUNNABLE.len();
let deferred = DEFERRED_RUNTIME.len();
let legacy = LEGACY_IGNORED_WITH_COMPANION_COVERAGE.len();
let placeholders = PLACEHOLDERS_NOT_IN_MANIFEST.len();
println!(
"e2e-parity coverage: total={}, naturally_runnable={}, deferred={}, legacy_ignored={}, placeholders_not_in_manifest={}",
all.len(),
runnable,
deferred,
legacy,
placeholders,
);
}
fn discover_all_slugs() -> Vec<String> {
let dir = web_examples_dir();
let mut out = Vec::new();
for entry in fs::read_dir(&dir).expect("web_examples readable") {
let entry = entry.expect("dir entry");
let path: PathBuf = entry.path();
let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
if !file_name.ends_with(".rs") {
continue;
}
if file_name == "mod.rs" {
continue;
}
out.push(file_name.trim_end_matches(".rs").to_owned());
}
out.sort();
out
}
#[test]
fn manifest_parser_extracts_slugs() {
let src = r#"
# comment
[[page]]
url = "https://x/a"
slug = "a_rust"
[[page]]
url = "https://x/b"
slug = "b_rust"
scope = "article"
# commented-out:
# [[page]]
# url = "https://x/c"
# slug = "c_rust"
"#;
let got = parse_manifest_slugs(src);
assert_eq!(got, vec!["a_rust".to_string(), "b_rust".to_string()]);
}
#[test]
fn discover_all_slugs_finds_committed_files() {
let slugs = discover_all_slugs();
for expected in PLACEHOLDERS_NOT_IN_MANIFEST {
assert!(
slugs.iter().any(|s| s == *expected),
"{expected} missing from tests/web_examples/",
);
}
for expected in &[
"encrypt_pdf_rust",
"extract_text_pdf_rust",
"fill_pdf_form_rust",
"merge_pdfs_rust",
"render_pdf_to_png_rust",
] {
assert!(slugs.iter().any(|s| s == *expected), "{expected} missing");
}
}
#[test]
fn manifest_path_resolves_and_is_readable() {
let path = manifest_path();
let src =
fs::read_to_string(&path).unwrap_or_else(|e| panic!("manifest at {}: {e}", path.display()));
assert!(
src.contains("[[page]]"),
"manifest at {} has no [[page]] entries",
path.display(),
);
}