use crate::ole::consts::PptRecordType;
use crate::ole::ppt::package::{PptError, Result};
use crate::ole::ppt::text::extractor::{parse_text_chars_atom, parse_text_bytes_atom, parse_cstring};
use super::{DocumentInfo, SlideInfo, SlideAtomsSet};
use zerocopy::{byteorder::{U16, U32, LittleEndian}, FromBytes};
#[derive(Debug, Clone)]
pub struct PptRecord {
pub record_type: PptRecordType,
pub record_type_raw: u16,
pub version: u16,
pub instance: u16,
pub data_length: u32,
pub data: Vec<u8>,
pub children: Vec<PptRecord>,
}
impl PptRecord {
pub fn parse(data: &[u8], offset: usize) -> Result<(Self, usize)> {
if offset + 8 > data.len() {
return Err(PptError::Corrupted("Not enough data for PPT record header".to_string()));
}
let version_instance = U16::<LittleEndian>::read_from_bytes(&data[offset..offset + 2])
.map(|v| v.get())
.unwrap_or(0);
let record_type = U16::<LittleEndian>::read_from_bytes(&data[offset + 2..offset + 4])
.map(|v| v.get())
.unwrap_or(0);
let data_length = U32::<LittleEndian>::read_from_bytes(&data[offset + 4..offset + 8])
.map(|v| v.get())
.unwrap_or(0);
let version = version_instance & 0x000F; let instance = (version_instance >> 4) & 0x0FFF;
let record_type_enum = PptRecordType::from(record_type);
let available_data_size = data.len().saturating_sub(offset + 8);
if data_length as usize > available_data_size {
if Self::is_container_record(record_type_enum) && available_data_size > 0 {
} else if available_data_size == 0 {
return Err(PptError::Corrupted("Record extends beyond data bounds and no data available".to_string()));
}
}
let actual_data_size = available_data_size.min(data_length as usize);
let record_data = data[offset + 8..offset + 8 + actual_data_size].to_vec();
let mut record = PptRecord {
record_type: record_type_enum,
record_type_raw: record_type,
version,
instance,
data_length: actual_data_size as u32,
data: record_data,
children: Vec::new(),
};
if Self::is_container_record(record_type_enum) && actual_data_size > 0 {
record.children = Self::parse_container_children(&data[offset + 8..offset + 8 + actual_data_size])?;
}
Ok((record, 8 + actual_data_size))
}
fn is_container_record(record_type: PptRecordType) -> bool {
matches!(
record_type,
PptRecordType::Document |
PptRecordType::Slide |
PptRecordType::Notes |
PptRecordType::MainMaster |
PptRecordType::HeadersFooters |
PptRecordType::ExObjList |
PptRecordType::VBAInfo |
PptRecordType::SlideListWithText |
PptRecordType::PersistPtrHolder |
PptRecordType::Environment |
PptRecordType::InteractiveInfo |
PptRecordType::AnimationInfo
)
}
fn parse_container_children(data: &[u8]) -> Result<Vec<PptRecord>> {
let mut children = Vec::new();
let mut offset = 0;
while offset + 8 <= data.len() {
match Self::parse(data, offset) {
Ok((child, consumed)) => {
children.push(child);
offset += consumed;
if consumed == 0 {
break;
}
}
Err(_) => {
offset += 1;
if offset + 8 > data.len() {
break;
}
}
}
}
Ok(children)
}
pub fn find_child(&self, record_type: PptRecordType) -> Option<&PptRecord> {
self.children.iter().find(|child| child.record_type == record_type)
}
pub fn find_children(&self, record_type: PptRecordType) -> Vec<&PptRecord> {
self.children.iter().filter(|child| child.record_type == record_type).collect()
}
pub fn extract_slide_data(&self) -> Option<Vec<u8>> {
if let Some(ppdrawing) = self.find_child(PptRecordType::PPDrawing) {
return Some(ppdrawing.data.clone());
}
if self.record_type == PptRecordType::Slide {
if !self.data.is_empty() && self.data.len() > 8 {
let first_record_type = U16::<LittleEndian>::read_from_bytes(&self.data[0..2])
.map(|v| v.get())
.unwrap_or(0);
if first_record_type >= 0xF000 {
return Some(self.data.clone());
}
}
}
None
}
pub fn extract_document_info(&self) -> Option<DocumentInfo> {
if self.record_type != PptRecordType::Document {
return None;
}
let mut info = DocumentInfo::default();
if let Some(document_atom) = self.find_child(PptRecordType::DocumentAtom) {
info = Self::parse_document_atom(document_atom);
}
if self.find_child(PptRecordType::Environment).is_some() {
info.has_environment = true;
}
if self.find_child(PptRecordType::PPDrawingGroup).is_some() {
info.has_drawing_group = true;
}
Some(info)
}
fn parse_document_atom(record: &PptRecord) -> DocumentInfo {
let mut info = DocumentInfo::default();
if record.data.len() >= 20 {
info.slide_width = U32::<LittleEndian>::read_from_bytes(&record.data[0..4])
.map(|v| v.get())
.unwrap_or(0);
info.slide_height = U32::<LittleEndian>::read_from_bytes(&record.data[4..8])
.map(|v| v.get())
.unwrap_or(0);
info.slide_count = U32::<LittleEndian>::read_from_bytes(&record.data[8..12])
.map(|v| v.get() as usize)
.unwrap_or(0);
info.notes_count = U32::<LittleEndian>::read_from_bytes(&record.data[12..16])
.map(|v| v.get() as usize)
.unwrap_or(0);
info.master_count = U32::<LittleEndian>::read_from_bytes(&record.data[16..20])
.map(|v| v.get() as usize)
.unwrap_or(0);
}
info
}
pub fn extract_slide_info(&self) -> Option<SlideInfo> {
if self.record_type != PptRecordType::Slide {
return None;
}
let mut info = SlideInfo::default();
if let Some(slide_atom) = self.find_child(PptRecordType::SlideAtom) {
info = Self::parse_slide_atom(slide_atom);
}
if self.find_child(PptRecordType::PPDrawing).is_some() {
info.has_drawing = true;
}
if self.find_child(PptRecordType::Notes).is_some() {
info.has_notes = true;
}
Some(info)
}
fn parse_slide_atom(record: &PptRecord) -> SlideInfo {
let mut info = SlideInfo::default();
if record.data.len() >= 12 {
info.layout_id = U32::<LittleEndian>::read_from_bytes(&record.data[0..4])
.map(|v| v.get())
.unwrap_or(0);
info.master_id = U32::<LittleEndian>::read_from_bytes(&record.data[4..8])
.map(|v| v.get())
.unwrap_or(0);
info.notes_id = U32::<LittleEndian>::read_from_bytes(&record.data[8..12])
.map(|v| v.get())
.unwrap_or(0);
}
info
}
pub fn extract_text(&self) -> Result<String> {
let mut text_parts = Vec::new();
match self.record_type {
PptRecordType::TextCharsAtom => {
if let Ok(text) = parse_text_chars_atom(&self.data) {
text_parts.push(text);
}
}
PptRecordType::TextBytesAtom => {
if let Ok(text) = parse_text_bytes_atom(&self.data) {
text_parts.push(text);
}
}
PptRecordType::CString => {
if let Ok(text) = parse_cstring(&self.data) {
text_parts.push(text);
}
}
_ => {}
}
for child in &self.children {
if let Ok(child_text) = child.extract_text() {
if !child_text.is_empty() {
text_parts.push(child_text);
}
}
}
Ok(text_parts.join("\n"))
}
pub fn extract_slide_list_with_texts(&self) -> Vec<&PptRecord> {
if self.record_type != PptRecordType::Document {
return Vec::new();
}
self.children
.iter()
.filter(|child| child.record_type == PptRecordType::SlideListWithText)
.collect()
}
pub fn get_instance(&self) -> u16 {
self.instance
}
pub fn group_into_slide_atoms_sets<'a>(&'a self) -> Vec<SlideAtomsSet<'a>> {
if self.record_type != PptRecordType::SlideListWithText {
return Vec::new();
}
let mut sets = Vec::new();
let mut i = 0;
while i < self.children.len() {
if self.children[i].record_type == PptRecordType::SlidePersistAtom {
let slide_persist_atom = &self.children[i];
let mut end_pos = i + 1;
while end_pos < self.children.len()
&& self.children[end_pos].record_type != PptRecordType::SlidePersistAtom {
end_pos += 1;
}
let associated_records: Vec<&PptRecord> =
self.children[i + 1..end_pos].iter().collect();
sets.push(SlideAtomsSet {
slide_persist_atom,
slide_records: associated_records,
});
i = end_pos;
} else {
i += 1;
}
}
sets
}
pub fn get_slide_id(&self) -> Option<u32> {
if self.record_type == PptRecordType::SlidePersistAtom && self.data.len() >= 4 {
Some(U32::<LittleEndian>::read_from_bytes(&self.data[0..4])
.map(|v| v.get())
.unwrap_or(0))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_record_creation() {
let record = PptRecord {
record_type: PptRecordType::Document,
record_type_raw: 1000,
version: 1,
instance: 0,
data_length: 16,
data: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
children: Vec::new(),
};
assert_eq!(record.record_type, PptRecordType::Document);
assert_eq!(record.version, 1);
assert_eq!(record.data_length, 16);
assert_eq!(record.data.len(), 16);
}
}