use std::fs;
use std::path::PathBuf;
use web_capture::extract_images::{
extract_and_save_images, extract_base64_to_buffers, has_base64_images, strip_base64_images,
};
const TINY_PNG: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==";
fn create_temp_dir() -> PathBuf {
use std::sync::atomic::{AtomicU64, Ordering};
static COUNTER: AtomicU64 = AtomicU64::new(0);
let id = COUNTER.fetch_add(1, Ordering::Relaxed);
let dir = std::env::temp_dir().join(format!("extract-images-test-{}-{id}", std::process::id()));
let _ = fs::remove_dir_all(&dir);
fs::create_dir_all(&dir).unwrap();
dir
}
fn cleanup(dir: &PathBuf) {
let _ = fs::remove_dir_all(dir);
}
#[test]
fn test_extract_single_png() {
let dir = create_temp_dir();
let md = format!("# Hello\n\n\n\nEnd.");
let result = extract_and_save_images(&md, &dir, "images").unwrap();
assert_eq!(result.extracted, 1);
assert!(result.markdown.contains(");
assert!(result.markdown.contains(".png)"));
assert!(!result.markdown.contains("data:image"));
let images_dir = dir.join("images");
assert!(images_dir.exists());
let entries: Vec<_> = fs::read_dir(&images_dir).unwrap().collect();
assert_eq!(entries.len(), 1);
let img_path = entries[0].as_ref().unwrap().path();
let filename = img_path.file_name().unwrap().to_str().unwrap();
assert!(filename.starts_with("image-"));
assert!(std::path::Path::new(filename)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("png")));
let buf = fs::read(&img_path).unwrap();
assert!(!buf.is_empty());
assert_eq!(buf[0], 0x89);
assert_eq!(buf[1], b'P');
cleanup(&dir);
}
#[test]
fn test_extract_duplicate_images_same_hash() {
let dir = create_temp_dir();
let md =
format!("\n");
let result = extract_and_save_images(&md, &dir, "images").unwrap();
assert_eq!(result.extracted, 2);
let re = regex::Regex::new(r"image-([0-9a-f]{8,})\.png").unwrap();
let hashes: Vec<_> = re
.captures_iter(&result.markdown)
.map(|c| c[1].to_string())
.collect();
assert_eq!(hashes.len(), 2);
assert_eq!(hashes[0], hashes[1]);
cleanup(&dir);
}
#[test]
fn test_custom_images_dir() {
let dir = create_temp_dir();
let md = format!("");
let result = extract_and_save_images(&md, &dir, "my-images").unwrap();
assert_eq!(result.extracted, 1);
assert!(result.markdown.contains("my-images/image-"));
assert!(dir.join("my-images").exists());
let entries: Vec<_> = fs::read_dir(dir.join("my-images")).unwrap().collect();
assert_eq!(entries.len(), 1);
cleanup(&dir);
}
#[test]
fn test_no_base64_images() {
let dir = create_temp_dir();
let md = "# No images\n\nJust text.";
let result = extract_and_save_images(md, &dir, "images").unwrap();
assert_eq!(result.extracted, 0);
assert_eq!(result.markdown, md);
assert!(!dir.join("images").exists());
cleanup(&dir);
}
#[test]
fn test_preserves_remote_urls() {
let dir = create_temp_dir();
let md = "\n";
let result = extract_and_save_images(md, &dir, "images").unwrap();
assert_eq!(result.extracted, 0);
assert_eq!(result.markdown, md);
cleanup(&dir);
}
#[test]
fn test_preserves_alt_text() {
let dir = create_temp_dir();
let md = format!("");
let result = extract_and_save_images(&md, &dir, "images").unwrap();
assert!(result
.markdown
.starts_with(");
assert!(result.markdown.ends_with(".png)"));
cleanup(&dir);
}
#[test]
fn test_svg_data_uri() {
let dir = create_temp_dir();
let svg = r#"<svg xmlns="http://www.w3.org/2000/svg" width="1" height="1"><rect fill="red" width="1" height="1"/></svg>"#;
let svg_b64 =
base64::Engine::encode(&base64::engine::general_purpose::STANDARD, svg.as_bytes());
let md = format!("");
let result = extract_and_save_images(&md, &dir, "images").unwrap();
assert_eq!(result.extracted, 1);
assert!(result.markdown.contains(".svg)"));
let images_dir = dir.join("images");
let entries: Vec<_> = fs::read_dir(&images_dir).unwrap().collect();
assert_eq!(entries.len(), 1);
let content = fs::read_to_string(entries[0].as_ref().unwrap().path()).unwrap();
assert!(content.contains("<svg"));
cleanup(&dir);
}
#[test]
fn test_has_base64_images_true() {
let md = format!("");
assert!(has_base64_images(&md));
}
#[test]
fn test_has_base64_images_false() {
assert!(!has_base64_images(""));
assert!(!has_base64_images(""));
}
#[test]
fn test_extract_base64_to_buffers() {
let md = format!("# Hello\n\n\n\nEnd.");
let result = extract_base64_to_buffers(&md, "images").unwrap();
assert_eq!(result.images.len(), 1);
assert!(result.images[0].filename.starts_with("image-"));
assert!(std::path::Path::new(&result.images[0].filename)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("png")));
assert!(!result.images[0].data.is_empty());
assert!(result
.markdown
.contains(&format!("images/{}", result.images[0].filename)));
assert!(!result.markdown.contains("data:image"));
}
#[test]
fn test_extract_base64_to_buffers_custom_dir() {
let md = format!("");
let result = extract_base64_to_buffers(&md, "assets").unwrap();
assert!(result.markdown.contains("assets/image-"));
}
#[test]
fn test_strip_base64_images_with_alt() {
let md = format!("");
let result = strip_base64_images(&md);
assert_eq!(result.stripped, 1);
assert_eq!(result.markdown, "*[image: my image]*");
assert!(!result.markdown.contains("data:image"));
}
#[test]
fn test_strip_base64_images_empty_alt_leaves_visible_placeholder() {
let md = format!("Hi.\n\n\n\nBye.\n");
let result = strip_base64_images(&md);
assert_eq!(result.stripped, 1);
assert!(!result.markdown.contains("data:image"));
assert!(
result.markdown.contains("![") || result.markdown.contains("[image"),
"stripping must leave a visible placeholder; got:\n{}",
result.markdown
);
}
#[test]
fn test_strip_base64_preserves_remote_urls() {
let md = "";
let result = strip_base64_images(md);
assert_eq!(result.stripped, 0);
assert_eq!(result.markdown, md);
}
#[test]
fn test_strip_base64_multiple_images() {
let md = format!("\n");
let result = strip_base64_images(&md);
assert_eq!(result.stripped, 1);
assert!(result.markdown.contains("*[image: a]*"));
assert!(result.markdown.contains("https://example.com/img.png"));
}