use std::fs::File;
use std::io::Write;
use std::path::Path;
use tempfile::tempdir;
use zip::CompressionMethod;
use zip::ZipWriter;
use zip::write::SimpleFileOptions;
use text_analysis::{extract_text_from_docx, extract_text_from_odt};
fn write_minimal_docx(target: &Path, body: &str) {
let file = File::create(target).expect("create docx file");
let mut zip = ZipWriter::new(file);
let deflated = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
zip.add_directory("word", deflated).expect("add word dir");
let document_xml = format!(
r##"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p><w:r><w:t>{}</w:t></w:r></w:p>
</w:body>
</w:document>"##,
body
);
zip.start_file("word/document.xml", deflated)
.expect("start document.xml");
zip.write_all(document_xml.as_bytes())
.expect("write document.xml");
zip.finish().expect("finish docx zip");
}
fn write_minimal_odt(target: &Path, body: &str) {
let file = File::create(target).expect("create odt file");
let mut zip = ZipWriter::new(file);
let deflated = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
let content_xml = format!(
r##"<?xml version="1.0" encoding="UTF-8"?>
<office:document-content
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
office:version="1.2">
<office:body>
<office:text>
<text:p>{}</text:p>
</office:text>
</office:body>
</office:document-content>"##,
body
);
zip.start_file("content.xml", deflated)
.expect("start content.xml");
zip.write_all(content_xml.as_bytes())
.expect("write content.xml");
zip.finish().expect("finish odt zip");
}
#[test]
fn docx_roundtrip_minimal_text() {
let dir = tempdir().expect("create tempdir");
let path = dir.path().join("sample.docx");
let body = "Hello DOCX";
write_minimal_docx(&path, body);
let extracted = extract_text_from_docx(&path).expect("extract text from docx");
assert_eq!(extracted, body, "DOCX extraction should match input body");
}
#[test]
fn odt_roundtrip_minimal_text() {
let dir = tempdir().expect("create tempdir");
let path = dir.path().join("sample.odt");
let body = "Hello ODT";
write_minimal_odt(&path, body);
let extracted = extract_text_from_odt(&path).expect("extract text from odt");
assert_eq!(extracted, body, "ODT extraction should match input body");
}
#[test]
fn docx_parsing_handles_line_breaks_and_paragraphs() {
let dir = tempdir().expect("create tempdir");
let path = dir.path().join("sample_breaks.docx");
let xml = r##"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p>
<w:r><w:t>Line 1</w:t></w:r>
<w:r><w:br/></w:r>
<w:r><w:t>Line 2</w:t></w:r>
</w:p>
<w:p><w:r><w:t>Para 2</w:t></w:r></w:p>
</w:body>
</w:document>"##;
let file = File::create(&path).expect("create docx file");
let mut zip = ZipWriter::new(file);
let deflated = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
zip.add_directory("word", deflated).expect("add word dir");
zip.start_file("word/document.xml", deflated)
.expect("start document.xml");
zip.write_all(xml.as_bytes()).expect("write document.xml");
zip.finish().expect("finish docx zip");
let extracted = extract_text_from_docx(&path).expect("extract text");
assert_eq!(extracted, "Line 1\nLine 2\nPara 2");
}
#[test]
fn odt_parsing_handles_paragraphs() {
let dir = tempdir().expect("create tempdir");
let path = dir.path().join("sample_breaks.odt");
let xml = r##"<?xml version="1.0" encoding="UTF-8"?>
<office:document-content
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
office:version="1.2">
<office:body>
<office:text>
<text:p>First paragraph</text:p>
<text:p>Second paragraph</text:p>
</office:text>
</office:body>
</office:document-content>"##;
let file = File::create(&path).expect("create odt file");
let mut zip = ZipWriter::new(file);
let deflated = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated);
zip.start_file("content.xml", deflated)
.expect("start content.xml");
zip.write_all(xml.as_bytes()).expect("write content.xml");
zip.finish().expect("finish odt zip");
let extracted = extract_text_from_odt(&path).expect("extract text");
assert_eq!(extracted, "First paragraph\nSecond paragraph");
}
#[test]
fn docx_missing_file_returns_error() {
let dir = tempdir().expect("create tempdir");
let missing = dir.path().join("nope.docx");
let err = extract_text_from_docx(&missing).unwrap_err();
assert!(
err.to_lowercase().contains("open .docx failed")
|| err.to_lowercase().contains("open .docx zip failed"),
"Unexpected error: {err}"
);
}
#[test]
fn odt_missing_file_returns_error() {
let dir = tempdir().expect("create tempdir");
let missing = dir.path().join("nope.odt");
let err = extract_text_from_odt(&missing).unwrap_err();
assert!(
err.to_lowercase().contains("open .odt failed")
|| err.to_lowercase().contains("open .odt zip failed"),
"Unexpected error: {err}"
);
}