use std::path::Path;
use std::collections::HashMap;
use crate::iwa::bundle::Bundle;
use crate::iwa::object_index::{ObjectIndex, ResolvedObject};
use crate::iwa::registry::{detect_application, Application};
use crate::iwa::media::{MediaManager, MediaStats};
use crate::iwa::structured::{self, StructuredData};
use crate::iwa::{Error, Result};
#[derive(Debug)]
pub struct Document {
bundle: Bundle,
object_index: ObjectIndex,
application: Application,
media_manager: Option<MediaManager>,
}
impl Document {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let path_ref = path.as_ref();
let bundle = Bundle::open(path_ref)?;
let object_index = ObjectIndex::from_bundle(&bundle)?;
let all_message_types: Vec<u32> = bundle.archives()
.values()
.flat_map(|archive| &archive.objects)
.flat_map(|obj| &obj.messages)
.map(|msg| msg.type_)
.collect();
let application = detect_application(&all_message_types)
.unwrap_or(Application::Common);
let media_manager = MediaManager::new(path_ref).ok();
Ok(Document {
bundle,
object_index,
application,
media_manager,
})
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self> {
let bundle = Bundle::from_bytes(bytes)?;
let object_index = ObjectIndex::from_bundle(&bundle)?;
let all_message_types: Vec<u32> = bundle.archives()
.values()
.flat_map(|archive| &archive.objects)
.flat_map(|obj| &obj.messages)
.map(|msg| msg.type_)
.collect();
let application = detect_application(&all_message_types)
.unwrap_or(Application::Common);
Ok(Document {
bundle,
object_index,
application,
media_manager: None, })
}
pub fn text(&self) -> Result<String> {
let mut all_text = Vec::new();
for archive in self.bundle.archives().values() {
for object in &archive.objects {
all_text.extend(object.extract_text());
if object.decoded_messages.is_empty() {
for raw_message in &object.messages {
if let Ok(decoded) = self.try_decode_message(raw_message) {
all_text.extend(decoded.extract_text());
}
}
}
}
}
Ok(all_text.join("\n"))
}
fn try_decode_message(&self, raw_message: &crate::iwa::archive::RawMessage) -> Result<Box<dyn crate::iwa::protobuf::DecodedMessage>> {
use crate::iwa::protobuf::decode;
decode(raw_message.type_, &raw_message.data)
}
pub fn objects(&self) -> Vec<ResolvedObject> {
self.object_index.all_object_ids()
.iter()
.filter_map(|&id| {
self.object_index.resolve_object(&self.bundle, id).ok().flatten()
})
.collect()
}
pub fn get_object(&self, id: u64) -> Result<Option<ResolvedObject>> {
self.object_index.resolve_object(&self.bundle, id)
}
pub fn application(&self) -> Application {
self.application
}
pub fn bundle(&self) -> &Bundle {
&self.bundle
}
pub fn metadata(&self) -> &crate::iwa::bundle::BundleMetadata {
self.bundle.metadata()
}
pub fn media_manager(&self) -> Option<&MediaManager> {
self.media_manager.as_ref()
}
pub fn media_stats(&self) -> Option<MediaStats> {
self.media_manager.as_ref().map(|m| m.stats())
}
pub fn extract_media(&self, filename: &str) -> Result<Vec<u8>> {
let manager = self.media_manager.as_ref()
.ok_or_else(|| Error::Bundle("Media manager not available".to_string()))?;
manager.extract(filename)
}
pub fn extract_structured_data(&self) -> Result<StructuredData> {
structured::extract_all(&self.bundle, &self.object_index)
}
pub fn stats(&self) -> DocumentStats {
let total_objects = self.object_index.all_object_ids().len();
let archives_count = self.bundle.archives().len();
let mut message_type_counts = HashMap::new();
for object in self.objects() {
for &msg_type in &object.message_types() {
*message_type_counts.entry(msg_type).or_insert(0) += 1;
}
}
let media_stats = self.media_stats();
DocumentStats {
total_objects,
archives_count,
message_type_counts,
application: self.application,
media_stats,
}
}
}
#[derive(Debug, Clone)]
pub struct DocumentStats {
pub total_objects: usize,
pub archives_count: usize,
pub message_type_counts: HashMap<u32, usize>,
pub application: Application,
pub media_stats: Option<MediaStats>,
}
impl DocumentStats {
pub fn most_common_message_type(&self) -> Option<(u32, usize)> {
self.message_type_counts.iter()
.max_by_key(|&(_, count)| count)
.map(|(&type_, &count)| (type_, count))
}
pub fn message_type_summary(&self) -> String {
let mut types: Vec<_> = self.message_type_counts.iter().collect();
types.sort_by_key(|&(_, count)| std::cmp::Reverse(*count));
let top_types: Vec<String> = types.into_iter()
.take(5)
.map(|(type_, count)| format!("{}: {}", type_, count))
.collect();
if top_types.len() < self.message_type_counts.len() {
format!("{} (and {} more)", top_types.join(", "), self.message_type_counts.len() - top_types.len())
} else {
top_types.join(", ")
}
}
}
pub struct PagesDocument(Document);
impl PagesDocument {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let doc = Document::open(path)?;
if !matches!(doc.application(), Application::Pages) {
return Err(Error::InvalidFormat("Not a Pages document".to_string()));
}
Ok(PagesDocument(doc))
}
pub fn document(&self) -> &Document {
&self.0
}
}
impl std::ops::Deref for PagesDocument {
type Target = Document;
fn deref(&self) -> &Self::Target {
&self.0
}
}
pub struct KeynoteDocument(Document);
impl KeynoteDocument {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let doc = Document::open(path)?;
if !matches!(doc.application(), Application::Keynote) {
return Err(Error::InvalidFormat("Not a Keynote document".to_string()));
}
Ok(KeynoteDocument(doc))
}
pub fn document(&self) -> &Document {
&self.0
}
pub fn slides(&self) -> Vec<KeynoteSlide> {
Vec::new()
}
}
impl std::ops::Deref for KeynoteDocument {
type Target = Document;
fn deref(&self) -> &Self::Target {
&self.0
}
}
pub struct NumbersDocument(Document);
impl NumbersDocument {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let doc = Document::open(path)?;
Ok(NumbersDocument(doc))
}
pub fn document(&self) -> &Document {
&self.0
}
pub fn sheets(&self) -> Vec<NumbersSheet> {
Vec::new()
}
}
impl std::ops::Deref for NumbersDocument {
type Target = Document;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug)]
pub struct KeynoteSlide {
pub title: Option<String>,
pub content: Vec<String>,
}
#[derive(Debug)]
pub struct NumbersSheet {
pub name: Option<String>,
pub tables: Vec<NumbersTable>,
}
#[derive(Debug)]
pub struct NumbersTable {
pub name: Option<String>,
pub row_count: usize,
pub column_count: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_document_stats() {
let mut message_counts = HashMap::new();
message_counts.insert(1, 10);
message_counts.insert(2, 5);
message_counts.insert(3, 15);
let stats = DocumentStats {
total_objects: 25,
archives_count: 3,
message_type_counts: message_counts,
application: Application::Pages,
media_stats: None,
};
assert_eq!(stats.total_objects, 25);
assert_eq!(stats.archives_count, 3);
assert_eq!(stats.most_common_message_type(), Some((3, 15)));
let summary = stats.message_type_summary();
assert!(summary.contains("3: 15"));
assert!(summary.contains("1: 10"));
}
#[test]
fn test_application_detection() {
let keynote_types = vec![101, 102, 103]; let keynote_result = detect_application(&keynote_types);
assert!(keynote_result.is_some());
let mixed_types = vec![1, 1, 1, 101]; let mixed_result = detect_application(&mixed_types);
assert!(mixed_result.is_some());
assert_eq!(detect_application(&[]), None);
}
#[test]
fn test_pages_document_parsing() {
let doc_path = std::path::Path::new("test.pages");
if !doc_path.exists() {
return;
}
let doc_result = Document::open(doc_path);
assert!(doc_result.is_ok(), "Failed to open Pages document: {:?}", doc_result.err());
let doc = doc_result.unwrap();
assert!(matches!(doc.application(), Application::Pages | Application::Common));
let objects = doc.objects();
assert!(!objects.is_empty(), "Document should contain objects");
let stats = doc.stats();
assert!(stats.total_objects > 0, "Document should have objects");
let text_result = doc.text();
assert!(text_result.is_ok());
}
#[test]
fn test_numbers_document_parsing() {
let doc_path = std::path::Path::new("test.numbers");
if !doc_path.exists() {
return;
}
let doc_result = Document::open(doc_path);
assert!(doc_result.is_ok(), "Failed to open Numbers document: {:?}", doc_result.err());
let doc = doc_result.unwrap();
assert!(matches!(doc.application(), Application::Numbers | Application::Common | Application::Pages));
let objects = doc.objects();
assert!(!objects.is_empty(), "Document should contain objects");
let numbers_result = NumbersDocument::open(doc_path);
assert!(numbers_result.is_ok(), "Failed to open as NumbersDocument");
let numbers_doc = numbers_result.unwrap();
let app = numbers_doc.application();
assert!(matches!(app, Application::Numbers | Application::Common | Application::Pages),
"Expected Numbers, Common, or Pages application, got {:?}", app);
}
#[test]
fn test_pages_document_interface() {
let doc_path = std::path::Path::new("test.pages");
if !doc_path.exists() {
return;
}
let pages_result = PagesDocument::open(doc_path);
if pages_result.is_err() {
return;
}
let pages_doc = pages_result.unwrap();
assert!(matches!(pages_doc.application(), Application::Pages | Application::Common));
let doc = pages_doc.document();
assert!(matches!(doc.application(), Application::Pages | Application::Common));
}
}