pub use crate::cfb::blip::BlipFormat as ImageFormat;
pub use crate::cfb::blip::BlipImage as DocImage;
pub fn extract_images(data: &[u8]) -> Vec<DocImage> {
let mut images = Vec::new();
let mut pos = 0;
while pos + 8 <= data.len() {
let rec_type = u16::from_le_bytes([data[pos + 2], data[pos + 3]]);
if is_blip_type(rec_type) {
let ver_inst = u16::from_le_bytes([data[pos], data[pos + 1]]);
let rec_len =
u32::from_le_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]])
as usize;
let inst = ver_inst >> 4;
let data_start = pos + 8;
let data_end = (data_start + rec_len).min(data.len());
let skip = uid_size(rec_type, inst) + metafile_header_size(rec_type);
let img_start = data_start + skip;
if img_start < data_end {
let img_data = &data[img_start..data_end];
if has_valid_signature(rec_type, img_data) {
images.push(DocImage {
format: ImageFormat::from_record_type(rec_type),
data: img_data.to_vec(),
index: images.len(),
});
}
}
pos = data_end;
} else {
pos += 1; }
}
images
}
fn is_blip_type(rt: u16) -> bool {
matches!(rt, 0xF01A..=0xF01F | 0xF029 | 0xF02A)
}
fn uid_size(rec_type: u16, inst: u16) -> usize {
let base = match rec_type {
0xF01A..=0xF01C => 16,
_ => 17,
};
if inst & 1 != 0 { base + 16 } else { base }
}
fn metafile_header_size(rec_type: u16) -> usize {
match rec_type {
0xF01A..=0xF01C => 34,
_ => 0,
}
}
fn has_valid_signature(rec_type: u16, data: &[u8]) -> bool {
if data.is_empty() {
return false;
}
match rec_type {
0xF01D | 0xF02A => data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8, 0xF01E => data.len() >= 4 && data.starts_with(b"\x89PNG"), 0xF01A => data.len() >= 4 && data[..4] == [0x01, 0x00, 0x00, 0x00], 0xF01B => data.len() > 10, _ => data.len() > 10, }
}
trait BlipFormatExt {
fn from_record_type(rt: u16) -> Self;
}
impl BlipFormatExt for ImageFormat {
fn from_record_type(rt: u16) -> Self {
match rt {
0xF01A => Self::Emf,
0xF01B => Self::Wmf,
0xF01C => Self::Pict,
0xF01D | 0xF02A => Self::Jpeg,
0xF01E => Self::Png,
0xF01F => Self::Dib,
0xF029 => Self::Tiff,
other => Self::Unknown(other),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_blip_in_data(rec_type: u16, inst: u16, img_data: &[u8]) -> Vec<u8> {
let ver_inst: u16 = inst << 4;
let uid_sz = uid_size(rec_type, inst);
let mf_sz = metafile_header_size(rec_type);
let rec_len = uid_sz + mf_sz + img_data.len();
let mut buf = vec![0u8; 100]; buf.extend_from_slice(&ver_inst.to_le_bytes());
buf.extend_from_slice(&rec_type.to_le_bytes());
buf.extend_from_slice(&(rec_len as u32).to_le_bytes());
buf.extend(vec![0u8; uid_sz]);
buf.extend(vec![0u8; mf_sz]);
buf.extend_from_slice(img_data);
buf.extend(vec![0u8; 50]); buf
}
#[test]
fn scan_finds_jpeg_in_data_stream() {
let data = make_blip_in_data(0xF01D, 0x46A, b"\xff\xd8\xff\xe0JFIF");
let images = extract_images(&data);
assert_eq!(images.len(), 1);
assert_eq!(images[0].format, ImageFormat::Jpeg);
assert!(images[0].data.starts_with(b"\xff\xd8"));
}
#[test]
fn scan_finds_png_in_data_stream() {
let data = make_blip_in_data(0xF01E, 0x6E0, b"\x89PNG\r\n\x1a\nIHDR");
let images = extract_images(&data);
assert_eq!(images.len(), 1);
assert_eq!(images[0].format, ImageFormat::Png);
assert!(images[0].data.starts_with(b"\x89PNG"));
}
#[test]
fn scan_finds_multiple_images() {
let mut data = make_blip_in_data(0xF01D, 0x46A, b"\xff\xd8\xff\xe0JPEG1");
data.extend(make_blip_in_data(0xF01E, 0x6E0, b"\x89PNG\r\n\x1a\nPNG2"));
let images = extract_images(&data);
assert_eq!(images.len(), 2);
assert_eq!(images[0].format, ImageFormat::Jpeg);
assert_eq!(images[1].format, ImageFormat::Png);
}
#[test]
fn rejects_false_positive() {
let mut data = vec![0u8; 100];
data[2] = 0x1D;
data[3] = 0xF0; data[4] = 30;
data[5] = 0;
data[6] = 0;
data[7] = 0; let images = extract_images(&data);
assert!(images.is_empty());
}
#[test]
fn empty_data_stream() {
assert!(extract_images(&[]).is_empty());
}
}