litchi 0.0.1

High-performance parser for Microsoft Office, OpenDocument, and Apple iWork file formats with unified API
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
/// Magic bytes that should be at the beginning of every OLE file
pub const MAGIC: &[u8; 8] = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1";

/// Minimal size of an empty OLE file with 512-byte sectors (1536 bytes)
pub const MINIMAL_OLEFILE_SIZE: usize = 1536;

/// Size of a directory entry in bytes
pub const DIRENTRY_SIZE: usize = 128;

/// Default sector size for version 3 (512 bytes)
pub const SECTOR_SIZE_V3: usize = 512;

/// Default sector size for version 4 (4096 bytes)
pub const SECTOR_SIZE_V4: usize = 4096;

// Sector IDs (from AAF specifications)
/// Maximum regular sector ID
pub const MAXREGSECT: u32 = 0xFFFFFFFA; // -6
/// Denotes a DIFAT sector in a FAT
pub const DIFSECT: u32 = 0xFFFFFFFC; // -4
/// Denotes a FAT sector in a FAT
pub const FATSECT: u32 = 0xFFFFFFFD; // -3
/// End of a virtual stream chain
pub const ENDOFCHAIN: u32 = 0xFFFFFFFE; // -2
/// Unallocated sector
pub const FREESECT: u32 = 0xFFFFFFFF; // -1

// Directory Entry IDs (from AAF specifications)
/// Maximum directory entry ID
pub const MAXREGSID: u32 = 0xFFFFFFFA; // -6
/// Unallocated directory entry
pub const NOSTREAM: u32 = 0xFFFFFFFF; // -1

// Object types in storage (from AAF specifications)
/// Empty directory entry
pub const STGTY_EMPTY: u8 = 0;
/// Element is a storage object
pub const STGTY_STORAGE: u8 = 1;
/// Element is a stream object
pub const STGTY_STREAM: u8 = 2;
/// Element is an ILockBytes object
pub const STGTY_LOCKBYTES: u8 = 3;
/// Element is an IPropertyStorage object
pub const STGTY_PROPERTY: u8 = 4;
/// Element is a root storage
pub const STGTY_ROOT: u8 = 5;

/// Unknown size for a stream (used when size is not known in advance)
pub const UNKNOWN_SIZE: u32 = 0x7FFFFFFF;

// Property types
pub const VT_EMPTY: u16 = 0;
pub const VT_NULL: u16 = 1;
pub const VT_I2: u16 = 2;
pub const VT_I4: u16 = 3;
pub const VT_R4: u16 = 4;
pub const VT_R8: u16 = 5;
pub const VT_CY: u16 = 6;
pub const VT_DATE: u16 = 7;
pub const VT_BSTR: u16 = 8;
pub const VT_DISPATCH: u16 = 9;
pub const VT_ERROR: u16 = 10;
pub const VT_BOOL: u16 = 11;
pub const VT_VARIANT: u16 = 12;
pub const VT_UNKNOWN: u16 = 13;
pub const VT_DECIMAL: u16 = 14;
pub const VT_I1: u16 = 16;
pub const VT_UI1: u16 = 17;
pub const VT_UI2: u16 = 18;
pub const VT_UI4: u16 = 19;
pub const VT_I8: u16 = 20;
pub const VT_UI8: u16 = 21;
pub const VT_INT: u16 = 22;
pub const VT_UINT: u16 = 23;
pub const VT_VOID: u16 = 24;
pub const VT_HRESULT: u16 = 25;
pub const VT_PTR: u16 = 26;
pub const VT_SAFEARRAY: u16 = 27;
pub const VT_CARRAY: u16 = 28;
pub const VT_USERDEFINED: u16 = 29;
pub const VT_LPSTR: u16 = 30;
pub const VT_LPWSTR: u16 = 31;
pub const VT_FILETIME: u16 = 64;
pub const VT_BLOB: u16 = 65;
pub const VT_STREAM: u16 = 66;
pub const VT_STORAGE: u16 = 67;
pub const VT_STREAMED_OBJECT: u16 = 68;
pub const VT_STORED_OBJECT: u16 = 69;
pub const VT_BLOB_OBJECT: u16 = 70;
pub const VT_CF: u16 = 71;
pub const VT_CLSID: u16 = 72;
pub const VT_VECTOR: u16 = 0x1000;

/// Common document type: Microsoft Word
pub const WORD_CLSID: &str = "00020900-0000-0000-C000-000000000046";

// PowerPoint Binary File Format (MS-PPT) constants

/// PPT record types (based on POI RecordTypes enum)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u16)]
pub enum PptRecordType {
    /// Unknown record type
    Unknown = 0,
    /// Document record
    Document = 1000,
    /// Document atom record
    DocumentAtom = 1001,
    /// End document record
    EndDocument = 1002,
    /// Slide record
    Slide = 1006,
    /// Slide atom record
    SlideAtom = 1007,
    /// Notes record
    Notes = 1008,
    /// Notes atom record
    NotesAtom = 1009,
    /// Environment record
    Environment = 1010,
    /// Slide persist atom record
    SlidePersistAtom = 1011,
    /// Main master record
    MainMaster = 1016,
    /// Slide list with text record
    SlideListWithText = 4080,
    /// Persist pointer holder record
    PersistPtrHolder = 6001,
    /// Slide show slide info atom
    SSSlideInfoAtom = 1017,
    /// VBA info record
    VBAInfo = 1023,
    /// VBA info atom record
    VBAInfoAtom = 1024,
    /// External object list record
    ExObjList = 1033,
    /// External object list atom record
    ExObjListAtom = 1034,
    /// PP drawing group record
    PPDrawingGroup = 1035,
    /// PP drawing record
    PPDrawing = 1036,
    /// OE placeholder atom record (placeholder data)
    OEPlaceholderAtom = 3011,
    /// Text header atom record
    TextHeaderAtom = 3999,
    /// Text characters atom record
    TextCharsAtom = 4000,
    /// Text bytes atom record
    TextBytesAtom = 4008,
    /// Text special info atom record
    TextSpecInfoAtom = 4010,
    /// Style text prop atom record
    StyleTextPropAtom = 4001,
    /// Master text prop atom record
    MasterTextPropAtom = 4002,
    /// Text master style atom record
    TxMasterStyleAtom = 4003,
    /// Text CF style atom record
    TxCFStyleAtom = 4004,
    /// Text PF style atom record
    TxPFStyleAtom = 4005,
    /// Text ruler atom record
    TextRulerAtom = 4006,
    /// Font entity atom record
    FontEntityAtom = 4023,
    /// CString record
    CString = 4026,
    /// Headers footers container record
    HeadersFooters = 4057,
    /// Headers footers atom record
    HeadersFootersAtom = 4058,
    /// Interactive info record
    InteractiveInfo = 4082,
    /// Interactive info atom record
    InteractiveInfoAtom = 4083,
    /// User edit atom record
    UserEditAtom = 4085,
    /// Current user atom record
    CurrentUserAtom = 4086,
    /// Date time MC atom record
    DateTimeMCAtom = 4087,
    /// Animation info record
    AnimationInfo = 4116,
    /// Animation info atom record
    AnimationInfoAtom = 4081,
    /// Comment 2000 record
    Comment2000 = 12000,
    /// Comment 2000 atom record
    Comment2000Atom = 12001,
}

impl From<u16> for PptRecordType {
    fn from(value: u16) -> Self {
        match value {
            0 => PptRecordType::Unknown,
            1000 => PptRecordType::Document,
            1001 => PptRecordType::DocumentAtom,
            1002 => PptRecordType::EndDocument,
            1006 => PptRecordType::Slide,
            1007 => PptRecordType::SlideAtom,
            1008 => PptRecordType::Notes,
            1009 => PptRecordType::NotesAtom,
            1010 => PptRecordType::Environment,
            1011 => PptRecordType::SlidePersistAtom,
            1016 => PptRecordType::MainMaster,
            1017 => PptRecordType::SSSlideInfoAtom,
            4080 => PptRecordType::SlideListWithText,
            6001 | 6002 => PptRecordType::PersistPtrHolder, // Both values are used
            1023 => PptRecordType::VBAInfo,
            1024 => PptRecordType::VBAInfoAtom,
            1033 => PptRecordType::ExObjList,
            1034 => PptRecordType::ExObjListAtom,
            1035 => PptRecordType::PPDrawingGroup,
            1036 => PptRecordType::PPDrawing,
            3011 => PptRecordType::OEPlaceholderAtom,
            3999 => PptRecordType::TextHeaderAtom,
            4000 => PptRecordType::TextCharsAtom,
            4008 => PptRecordType::TextBytesAtom,
            4010 => PptRecordType::TextSpecInfoAtom,
            4001 => PptRecordType::StyleTextPropAtom,
            4002 => PptRecordType::MasterTextPropAtom,
            4003 => PptRecordType::TxMasterStyleAtom,
            4004 => PptRecordType::TxCFStyleAtom,
            4005 => PptRecordType::TxPFStyleAtom,
            4006 => PptRecordType::TextRulerAtom,
            4023 => PptRecordType::FontEntityAtom,
            4026 => PptRecordType::CString,
            4057 => PptRecordType::HeadersFooters,
            4058 => PptRecordType::HeadersFootersAtom,
            4082 => PptRecordType::InteractiveInfo,
            4083 => PptRecordType::InteractiveInfoAtom,
            4085 => PptRecordType::UserEditAtom,
            4086 => PptRecordType::CurrentUserAtom,
            4087 => PptRecordType::DateTimeMCAtom,
            4116 => PptRecordType::AnimationInfo,
            4081 => PptRecordType::AnimationInfoAtom,
            12000 => PptRecordType::Comment2000,
            12001 => PptRecordType::Comment2000Atom,
            _ => PptRecordType::Unknown,
        }
    }
}

impl PptRecordType {
    /// Get the u16 value of this record type
    pub fn as_u16(self) -> u16 {
        unsafe { std::mem::transmute::<Self, u16>(self) }
    }
}

/// Escher record types (MS-ODRAW format)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u16)]
pub enum EscherRecordType {
    /// Container record
    Container = 0xF000,
    /// Shape record
    Shape = 0xF004,
    /// Text box record
    TextBox = 0xF00C,
    /// Client text box record
    ClientTextBox = 0xF00D,
    /// Child anchor record
    ChildAnchor = 0xF00E,
    /// Client anchor record
    ClientAnchor = 0xF00F,
    /// Client data record
    ClientData = 0xF010,
    /// Properties record
    Properties = 0xF011,
    /// Transform record
    Transform = 0xF012,
    /// Text record
    Text = 0xF013,
    /// Placeholder data record
    PlaceholderData = 0xF014,
}

impl From<u16> for EscherRecordType {
    fn from(value: u16) -> Self {
        match value {
            0xF000 => EscherRecordType::Container,
            0xF004 => EscherRecordType::Shape,
            0xF00C => EscherRecordType::TextBox,
            0xF00D => EscherRecordType::ClientTextBox,
            0xF00E => EscherRecordType::ChildAnchor,
            0xF00F => EscherRecordType::ClientAnchor,
            0xF010 => EscherRecordType::ClientData,
            0xF011 => EscherRecordType::Properties,
            0xF012 => EscherRecordType::Transform,
            0xF013 => EscherRecordType::Text,
            0xF014 => EscherRecordType::PlaceholderData,
            _ => EscherRecordType::Container, // Default fallback
        }
    }
}

impl EscherRecordType {
    /// Get the u16 value of this record type
    pub fn as_u16(self) -> u16 {
        unsafe { std::mem::transmute::<Self, u16>(self) }
    }
}

// Additional Escher/MS-ODRAW constants

/// Escher record version bits (high 12 bits)
pub const ESCHER_VERSION_MASK: u16 = 0x0FFF;

/// Escher record instance bits (low 12 bits)
pub const ESCHER_INSTANCE_MASK: u16 = 0x0FFF;

/// Escher record header size in bytes
pub const ESCHER_HEADER_SIZE: usize = 8;

/// Minimum size for a valid Escher record
pub const ESCHER_MIN_RECORD_SIZE: usize = ESCHER_HEADER_SIZE;

/// Escher container record flag (has children)
pub const ESCHER_CONTAINER_FLAG: u16 = 0x000F;

/// Shape types in Escher format
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u16)]
pub enum EscherShapeType {
    /// Not a primitive shape
    NotPrimitive = 0,
    /// Rectangle
    Rectangle = 1,
    /// Round rectangle
    RoundRectangle = 2,
    /// Oval
    Oval = 3,
    /// Diamond
    Diamond = 4,
    /// Isosceles triangle
    Triangle = 5,
    /// Right triangle
    RightTriangle = 6,
    /// Parallelogram
    Parallelogram = 7,
    /// Trapezoid
    Trapezoid = 8,
    /// Hexagon
    Hexagon = 9,
    /// Octagon
    Octagon = 10,
    /// Plus sign
    Plus = 11,
    /// Star
    Star = 12,
    /// Arrow
    Arrow = 13,
    /// Thick arrow
    ThickArrow = 14,
    /// Home plate
    HomePlate = 15,
    /// Cube
    Cube = 16,
    /// Balloon
    Balloon = 17,
    /// Seal
    Seal = 18,
    /// Arc
    Arc = 19,
    /// Line
    Line = 20,
    /// Plaque
    Plaque = 21,
    /// Can
    Can = 22,
    /// Donut
    Donut = 23,
    /// Text simple
    TextSimple = 24,
    /// Text octagon
    TextOctagon = 25,
    /// Text hexagon
    TextHexagon = 26,
    /// Text curve
    TextCurve = 27,
    /// Text wave
    TextWave = 28,
    /// Text ring
    TextRing = 29,
    /// Text on curve
    TextOnCurve = 30,
    /// Text on ring
    TextOnRing = 31,
    /// Straight connector 1
    StraightConnector1 = 32,
    /// Bent connector 2
    BentConnector2 = 33,
    /// Bent connector 3
    BentConnector3 = 34,
    /// Bent connector 4
    BentConnector4 = 35,
    /// Bent connector 5
    BentConnector5 = 36,
    /// Curved connector 2
    CurvedConnector2 = 37,
    /// Curved connector 3
    CurvedConnector3 = 38,
    /// Curved connector 4
    CurvedConnector4 = 39,
    /// Curved connector 5
    CurvedConnector5 = 40,
    /// Callout 1
    Callout1 = 41,
    /// Callout 2
    Callout2 = 42,
    /// Callout 3
    Callout3 = 43,
    /// Accent callout 1
    AccentCallout1 = 44,
    /// Accent callout 2
    AccentCallout2 = 45,
    /// Accent callout 3
    AccentCallout3 = 46,
    /// Border callout 1
    BorderCallout1 = 47,
    /// Border callout 2
    BorderCallout2 = 48,
    /// Border callout 3
    BorderCallout3 = 49,
    /// Accent border callout 1
    AccentBorderCallout1 = 50,
    /// Accent border callout 2
    AccentBorderCallout2 = 51,
    /// Accent border callout 3
    AccentBorderCallout3 = 52,
    /// Custom shape
    Custom = 255,
}

impl From<u16> for EscherShapeType {
    fn from(value: u16) -> Self {
        match value {
            0 => EscherShapeType::NotPrimitive,
            1 => EscherShapeType::Rectangle,
            2 => EscherShapeType::RoundRectangle,
            3 => EscherShapeType::Oval,
            4 => EscherShapeType::Diamond,
            5 => EscherShapeType::Triangle,
            6 => EscherShapeType::RightTriangle,
            7 => EscherShapeType::Parallelogram,
            8 => EscherShapeType::Trapezoid,
            9 => EscherShapeType::Hexagon,
            10 => EscherShapeType::Octagon,
            11 => EscherShapeType::Plus,
            12 => EscherShapeType::Star,
            13 => EscherShapeType::Arrow,
            14 => EscherShapeType::ThickArrow,
            15 => EscherShapeType::HomePlate,
            16 => EscherShapeType::Cube,
            17 => EscherShapeType::Balloon,
            18 => EscherShapeType::Seal,
            19 => EscherShapeType::Arc,
            20 => EscherShapeType::Line,
            21 => EscherShapeType::Plaque,
            22 => EscherShapeType::Can,
            23 => EscherShapeType::Donut,
            24 => EscherShapeType::TextSimple,
            25 => EscherShapeType::TextOctagon,
            26 => EscherShapeType::TextHexagon,
            27 => EscherShapeType::TextCurve,
            28 => EscherShapeType::TextWave,
            29 => EscherShapeType::TextRing,
            30 => EscherShapeType::TextOnCurve,
            31 => EscherShapeType::TextOnRing,
            32 => EscherShapeType::StraightConnector1,
            33 => EscherShapeType::BentConnector2,
            34 => EscherShapeType::BentConnector3,
            35 => EscherShapeType::BentConnector4,
            36 => EscherShapeType::BentConnector5,
            37 => EscherShapeType::CurvedConnector2,
            38 => EscherShapeType::CurvedConnector3,
            39 => EscherShapeType::CurvedConnector4,
            40 => EscherShapeType::CurvedConnector5,
            41 => EscherShapeType::Callout1,
            42 => EscherShapeType::Callout2,
            43 => EscherShapeType::Callout3,
            44 => EscherShapeType::AccentCallout1,
            45 => EscherShapeType::AccentCallout2,
            46 => EscherShapeType::AccentCallout3,
            47 => EscherShapeType::BorderCallout1,
            48 => EscherShapeType::BorderCallout2,
            49 => EscherShapeType::BorderCallout3,
            50 => EscherShapeType::AccentBorderCallout1,
            51 => EscherShapeType::AccentBorderCallout2,
            52 => EscherShapeType::AccentBorderCallout3,
            255 => EscherShapeType::Custom,
            _ => EscherShapeType::NotPrimitive,
        }
    }
}

impl EscherShapeType {
    /// Get the u16 value of this shape type
    pub fn as_u16(self) -> u16 {
        unsafe { std::mem::transmute::<Self, u16>(self) }
    }
}