#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_is_document_file() {
assert!(is_document_file(Path::new("docs/spec.pdf")));
assert!(is_document_file(Path::new("diagram.svg")));
assert!(is_document_file(Path::new("screenshot.png")));
assert!(is_document_file(Path::new("photo.jpg")));
assert!(is_document_file(Path::new("photo.JPEG")));
assert!(is_document_file(Path::new("README.md")));
assert!(is_document_file(Path::new("notes.txt")));
assert!(is_document_file(Path::new("doc.rst")));
assert!(is_document_file(Path::new("doc.adoc")));
assert!(!is_document_file(Path::new("main.rs")));
assert!(!is_document_file(Path::new("lib.py")));
assert!(!is_document_file(Path::new("Cargo.toml")));
}
#[test]
fn test_extract_svg_with_text() {
let dir = tempfile::tempdir().unwrap();
let svg_path = dir.path().join("diagram.svg");
std::fs::write(
&svg_path,
r#"<svg><text x="10" y="20">Hello World</text><tspan>Sub text</tspan></svg>"#,
)
.unwrap();
let chunks = extract_svg(&svg_path, "diagram.svg", "abc123").unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("Hello World"));
assert!(chunks[0].text_content.contains("Sub text"));
assert_eq!(chunks[0].doc_type, DocumentType::Svg);
}
#[test]
fn test_extract_svg_no_text() {
let dir = tempfile::tempdir().unwrap();
let svg_path = dir.path().join("empty.svg");
std::fs::write(&svg_path, r#"<svg><rect width="100" height="100"/></svg>"#).unwrap();
let chunks = extract_svg(&svg_path, "empty.svg", "def456").unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("no text content"));
assert_eq!(chunks[0].extraction_quality, 0.2);
}
#[test]
fn test_extract_svg_with_title() {
let dir = tempfile::tempdir().unwrap();
let svg_path = dir.path().join("titled.svg");
std::fs::write(
&svg_path,
r#"<svg><title>Architecture Diagram</title><text>Node A</text></svg>"#,
)
.unwrap();
let chunks = extract_svg(&svg_path, "titled.svg", "ghi789").unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("Architecture Diagram"));
assert!(chunks[0].text_content.contains("Node A"));
}
#[test]
fn test_extract_image_metadata() {
let dir = tempfile::tempdir().unwrap();
let img_path = dir.path().join("screenshot.png");
std::fs::write(&img_path, b"fake png data").unwrap();
let chunks =
extract_image_metadata(&img_path, "docs/screenshots/screenshot.png", "hash1").unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("screenshot.png"));
assert!(chunks[0].text_content.contains("docs/screenshots"));
assert_eq!(chunks[0].extraction_quality, 0.3);
assert_eq!(chunks[0].doc_type, DocumentType::Image);
}
#[test]
fn test_extract_markdown_with_headings() {
let dir = tempfile::tempdir().unwrap();
let md_path = dir.path().join("doc.md");
std::fs::write(
&md_path,
"# Title\n\nIntro paragraph.\n\n## Section A\n\nContent A.\n\n## Section B\n\nContent B.\n",
)
.unwrap();
let chunks = extract_markdown(&md_path, "doc.md", "hash2").unwrap();
assert!(chunks.len() >= 2);
assert_eq!(chunks[0].section_heading, Some("Title".to_string()));
assert!(chunks[0].text_content.contains("Intro paragraph"));
assert_eq!(chunks[1].section_heading, Some("Section A".to_string()));
assert!(chunks[1].text_content.contains("Content A"));
}
#[test]
fn test_extract_markdown_no_headings() {
let dir = tempfile::tempdir().unwrap();
let md_path = dir.path().join("flat.md");
std::fs::write(&md_path, "Just some plain text\nwith no headings.\n").unwrap();
let chunks = extract_markdown(&md_path, "flat.md", "hash3").unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("plain text"));
assert_eq!(chunks[0].section_heading, None);
}
#[test]
fn test_extract_markdown_empty() {
let dir = tempfile::tempdir().unwrap();
let md_path = dir.path().join("empty.md");
std::fs::write(&md_path, "").unwrap();
let chunks = extract_markdown(&md_path, "empty.md", "hash4").unwrap();
assert!(chunks.is_empty());
}
#[test]
fn test_extract_plaintext() {
let dir = tempfile::tempdir().unwrap();
let txt_path = dir.path().join("notes.txt");
std::fs::write(&txt_path, "Line 1\nLine 2\nLine 3\n").unwrap();
let chunks = extract_plaintext(&txt_path, "notes.txt", "hash5").unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("Line 1"));
assert_eq!(chunks[0].doc_type, DocumentType::PlainText);
}
#[test]
fn test_extract_plaintext_empty() {
let dir = tempfile::tempdir().unwrap();
let txt_path = dir.path().join("empty.txt");
std::fs::write(&txt_path, " \n \n").unwrap();
let chunks = extract_plaintext(&txt_path, "empty.txt", "hash6").unwrap();
assert!(chunks.is_empty());
}
#[test]
fn test_split_into_chunks_large_content() {
let mut content = String::new();
for i in 0..500 {
content.push_str(&format!(
"Line {i}: This is a test line with some content.\n"
));
}
assert!(content.len() > MAX_CHUNK_SIZE);
let chunks = split_into_chunks(&content, "big.txt", DocumentType::PlainText, "hash7", 1.0);
assert!(chunks.len() > 1);
for chunk in &chunks {
assert!(chunk.text_content.len() <= MAX_CHUNK_SIZE + 100); }
}
#[test]
fn test_truncate_to_max_chunk() {
let short = "hello world";
assert_eq!(truncate_to_max_chunk(short), "hello world");
let long = "a ".repeat(MAX_CHUNK_SIZE);
let truncated = truncate_to_max_chunk(&long);
assert!(truncated.len() <= MAX_CHUNK_SIZE);
}
#[test]
fn test_extract_document_dispatcher() {
let dir = tempfile::tempdir().unwrap();
let svg_path = dir.path().join("test.svg");
std::fs::write(&svg_path, "<svg><text>Hello</text></svg>").unwrap();
assert!(extract_document(&svg_path, "test.svg", "h1").is_ok());
let md_path = dir.path().join("test.md");
std::fs::write(&md_path, "# Hello\nWorld").unwrap();
assert!(extract_document(&md_path, "test.md", "h2").is_ok());
let txt_path = dir.path().join("test.txt");
std::fs::write(&txt_path, "Hello").unwrap();
assert!(extract_document(&txt_path, "test.txt", "h3").is_ok());
let img_path = dir.path().join("test.png");
std::fs::write(&img_path, b"PNG").unwrap();
assert!(extract_document(&img_path, "test.png", "h4").is_ok());
let rs_path = dir.path().join("test.rs");
std::fs::write(&rs_path, "fn main() {}").unwrap();
assert!(extract_document(&rs_path, "test.rs", "h5").is_err());
}
#[test]
fn test_pdf_without_feature() {
let dir = tempfile::tempdir().unwrap();
let pdf_path = dir.path().join("test.pdf");
let mut f = std::fs::File::create(&pdf_path).unwrap();
f.write_all(b"%PDF-1.4 fake").unwrap();
let result = extract_pdf(&pdf_path, "test.pdf", "hashpdf");
#[cfg(not(feature = "doc-indexing"))]
{
let chunks = result.unwrap();
assert_eq!(chunks.len(), 1);
assert!(chunks[0].text_content.contains("doc-indexing"));
assert_eq!(chunks[0].extraction_quality, 0.1);
}
}
}