mod common;
use std::io::Read;
use std::path::Path;
use std::process::{Command, Stdio};
use common::dbmd;
fn crc32(data: &[u8]) -> u32 {
let mut crc: u32 = 0xFFFF_FFFF;
for &b in data {
crc ^= b as u32;
for _ in 0..8 {
crc = (crc >> 1) ^ (0xEDB8_8320 & 0u32.wrapping_sub(crc & 1));
}
}
!crc
}
fn write_stored_zip(dest: &Path, entries: &[(&str, &[u8])]) {
fn u16le(v: u16) -> [u8; 2] {
v.to_le_bytes()
}
fn u32le(v: u32) -> [u8; 4] {
v.to_le_bytes()
}
let mut out = Vec::new();
let mut central = Vec::new();
for (name, data) in entries {
let name = name.as_bytes();
let crc = crc32(data);
let off = out.len() as u32;
out.extend_from_slice(b"PK\x03\x04");
out.extend_from_slice(&u16le(20)); out.extend_from_slice(&u16le(0)); out.extend_from_slice(&u16le(0)); out.extend_from_slice(&u16le(0)); out.extend_from_slice(&u16le(0)); out.extend_from_slice(&u32le(crc));
out.extend_from_slice(&u32le(data.len() as u32)); out.extend_from_slice(&u32le(data.len() as u32)); out.extend_from_slice(&u16le(name.len() as u16));
out.extend_from_slice(&u16le(0)); out.extend_from_slice(name);
out.extend_from_slice(data);
central.extend_from_slice(b"PK\x01\x02");
central.extend_from_slice(&u16le(20)); central.extend_from_slice(&u16le(20)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u32le(crc));
central.extend_from_slice(&u32le(data.len() as u32));
central.extend_from_slice(&u32le(data.len() as u32));
central.extend_from_slice(&u16le(name.len() as u16));
central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u16le(0)); central.extend_from_slice(&u32le(0)); central.extend_from_slice(&u32le(off)); central.extend_from_slice(name);
}
let cd_offset = out.len() as u32;
out.extend_from_slice(¢ral);
out.extend_from_slice(b"PK\x05\x06");
out.extend_from_slice(&u16le(0)); out.extend_from_slice(&u16le(0)); out.extend_from_slice(&u16le(entries.len() as u16)); out.extend_from_slice(&u16le(entries.len() as u16)); out.extend_from_slice(&u32le(central.len() as u32)); out.extend_from_slice(&u32le(cd_offset)); out.extend_from_slice(&u16le(0));
std::fs::write(dest, out).unwrap();
}
fn write_dense_bomb_xlsx(dest: &Path) {
let content_types = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
<Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
</Types>"#;
let root_rels = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
</Relationships>"#;
let workbook = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheets><sheet name="Sheet1" sheetId="1" r:id="rId1"/></sheets>
</workbook>"#;
let workbook_rels = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
</Relationships>"#;
let bomb_sheet = br#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<sheetData>
<row r="1"><c r="A1"><v>1</v></c></row>
<row r="1048576"><c r="XFD1048576"><v>2</v></c></row>
</sheetData></worksheet>"#;
write_stored_zip(
dest,
&[
("[Content_Types].xml", content_types),
("_rels/.rels", root_rels),
("xl/workbook.xml", workbook),
("xl/_rels/workbook.xml.rels", workbook_rels),
("xl/worksheets/sheet1.xml", bomb_sheet),
],
);
}
#[test]
fn dense_grid_bomb_xlsx_refuses_cleanly_not_oom() {
let tmp = tempfile::TempDir::new().unwrap();
let bomb = tmp.path().join("invoice.xlsx");
write_dense_bomb_xlsx(&bomb);
assert!(
std::fs::metadata(&bomb).unwrap().len() < 10_000,
"the bomb must be tiny on disk"
);
let out = dbmd().arg("extract").arg(&bomb).assert().failure().code(1);
let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
assert!(
stdout.is_empty(),
"an over-cap spreadsheet must emit nothing to stdout, got: {stdout:?}"
);
let out = dbmd()
.arg("--json")
.arg("extract")
.arg(&bomb)
.assert()
.failure()
.code(1);
let stderr = String::from_utf8(out.get_output().stderr.clone()).unwrap();
let parsed: serde_json::Value =
serde_json::from_str(stderr.trim()).expect("JSON error object on stderr");
assert_eq!(parsed["error"]["code"], "EXTRACT_PARSE_ERROR");
}
fn write_large_html(dest: &Path) {
let mut body = String::with_capacity(2 * 1024 * 1024);
body.push_str("<html><body>");
for i in 0..40_000 {
body.push_str(&format!(
"<p>line number {i} with some filler words here</p>"
));
}
body.push_str("</body></html>");
std::fs::write(dest, body).unwrap();
}
#[test]
fn broken_pipe_downstream_exits_zero_not_io_error() {
let tmp = tempfile::TempDir::new().unwrap();
let big = tmp.path().join("big.html");
write_large_html(&big);
let bin = assert_cmd::cargo::cargo_bin("dbmd");
let mut child = Command::new(bin)
.arg("extract")
.arg(&big)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("spawn dbmd extract");
{
let mut stdout = child.stdout.take().expect("piped stdout");
let mut buf = [0u8; 64];
let _ = stdout.read(&mut buf);
}
let output = child.wait_with_output().expect("wait for dbmd");
assert!(
output.status.success(),
"broken pipe must exit 0, got status {:?} with stderr: {}",
output.status.code(),
String::from_utf8_lossy(&output.stderr)
);
assert!(
output.stderr.is_empty(),
"broken pipe must not emit an error envelope, got stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
}