Skip to main content

xlsbye_formula/
ptg.rs

1use std::char::decode_utf16;
2
3use thiserror::Error;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum PtgClass {
7    Reference,
8    Value,
9    Array,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct PtgAttrToken {
14    pub subtype: u8,
15    pub data: Vec<u8>,
16}
17
18impl PtgAttrToken {
19    pub fn is_sum(&self) -> bool {
20        self.subtype == 0x10
21    }
22}
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct PtgExtendedToken {
26    pub subtype: u8,
27    pub data: Vec<u8>,
28}
29
30#[derive(Debug, Clone, PartialEq)]
31pub enum PtgToken {
32    Exp { data: [u8; 4] },
33    Tbl { data: [u8; 4] },
34
35    Add,
36    Sub,
37    Mul,
38    Div,
39    Power,
40    Concat,
41    Lt,
42    Le,
43    Eq,
44    Ge,
45    Gt,
46    Ne,
47    Isect,
48    Union,
49    Range,
50
51    UPlus,
52    UMinus,
53    Percent,
54    Paren,
55    MissArg,
56
57    Str(String),
58    Extended(PtgExtendedToken),
59    Attr(PtgAttrToken),
60    Err(u8),
61    Bool(bool),
62    Int(u16),
63    Num(f64),
64
65    Array { class: PtgClass, data: [u8; 14] },
66    Func { class: PtgClass, iftab: u16 },
67    FuncVar { class: PtgClass, argc: u8, iftab: u16 },
68    Name { class: PtgClass, name_index: u32 },
69    Ref {
70        class: PtgClass,
71        row: u32,
72        col: u16,
73    },
74    Area {
75        class: PtgClass,
76        first_row: u32,
77        last_row: u32,
78        first_col: u16,
79        last_col: u16,
80    },
81    MemArea {
82        class: PtgClass,
83        reserved: u32,
84        cce: u16,
85    },
86    MemErr {
87        class: PtgClass,
88        reserved: u32,
89        cce: u16,
90    },
91    MemNoMem {
92        class: PtgClass,
93        reserved: u32,
94        cce: u16,
95    },
96    MemFunc {
97        class: PtgClass,
98        rgce: Vec<u8>,
99    },
100    RefN {
101        class: PtgClass,
102        row: u32,
103        col: u16,
104    },
105    AreaN {
106        class: PtgClass,
107        first_row: u32,
108        last_row: u32,
109        first_col: u16,
110        last_col: u16,
111    },
112    RefErr {
113        class: PtgClass,
114        data: [u8; 6],
115    },
116    AreaErr {
117        class: PtgClass,
118        data: [u8; 12],
119    },
120    NameX {
121        class: PtgClass,
122        ixti: u16,
123        name_index: u32,
124    },
125    Ref3d {
126        class: PtgClass,
127        ixti: u16,
128        row: u32,
129        col: u16,
130    },
131    Area3d {
132        class: PtgClass,
133        ixti: u16,
134        first_row: u32,
135        last_row: u32,
136        first_col: u16,
137        last_col: u16,
138    },
139    RefErr3d {
140        class: PtgClass,
141        ixti: u16,
142        data: [u8; 6],
143    },
144    AreaErr3d {
145        class: PtgClass,
146        ixti: u16,
147        data: [u8; 12],
148    },
149}
150
151#[derive(Debug, Error, Clone, PartialEq, Eq)]
152pub enum PtgParseError {
153    #[error(
154        "unexpected EOF while reading {context}: need {needed} byte(s), have {remaining} byte(s)"
155    )]
156    UnexpectedEof {
157        context: &'static str,
158        needed: usize,
159        remaining: usize,
160    },
161
162    #[error("unknown Ptg opcode 0x{opcode:02X} at byte offset {offset}")]
163    UnknownOpcode { opcode: u8, offset: usize },
164
165    #[error("unknown PtgAttr subtype 0x{subtype:02X} at byte offset {offset}")]
166    UnknownAttrSubtype { subtype: u8, offset: usize },
167
168    #[error("unknown PtgExtended subtype 0x{subtype:02X} at byte offset {offset}")]
169    UnknownExtendedSubtype { subtype: u8, offset: usize },
170
171    #[error("invalid UTF-16 string payload: {reason}")]
172    InvalidUtf16 { reason: String },
173
174    #[error("string payload length overflow")]
175    LengthOverflow,
176}
177
178pub fn parse_ptg_tokens(rgce: &[u8]) -> Result<Vec<PtgToken>, PtgParseError> {
179    let mut parser = PtgParser { rgce, pos: 0 };
180    let mut tokens = Vec::new();
181
182    while !parser.is_eof() {
183        tokens.push(parser.parse_next()?);
184    }
185
186    Ok(tokens)
187}
188
189struct PtgParser<'a> {
190    rgce: &'a [u8],
191    pos: usize,
192}
193
194impl<'a> PtgParser<'a> {
195    fn is_eof(&self) -> bool {
196        self.pos >= self.rgce.len()
197    }
198
199    fn parse_next(&mut self) -> Result<PtgToken, PtgParseError> {
200        let opcode_offset = self.pos;
201        let opcode = self.read_u8("Ptg opcode")?;
202
203        let token = match opcode {
204            0x01 => PtgToken::Exp {
205                data: self.read_array("PtgExp data")?,
206            },
207            0x02 => PtgToken::Tbl {
208                data: self.read_array("PtgTbl data")?,
209            },
210
211            0x03 => PtgToken::Add,
212            0x04 => PtgToken::Sub,
213            0x05 => PtgToken::Mul,
214            0x06 => PtgToken::Div,
215            0x07 => PtgToken::Power,
216            0x08 => PtgToken::Concat,
217            0x09 => PtgToken::Lt,
218            0x0A => PtgToken::Le,
219            0x0B => PtgToken::Eq,
220            0x0C => PtgToken::Ge,
221            0x0D => PtgToken::Gt,
222            0x0E => PtgToken::Ne,
223            0x0F => PtgToken::Isect,
224            0x10 => PtgToken::Union,
225            0x11 => PtgToken::Range,
226
227            0x12 => PtgToken::UPlus,
228            0x13 => PtgToken::UMinus,
229            0x14 => PtgToken::Percent,
230            0x15 => PtgToken::Paren,
231            0x16 => PtgToken::MissArg,
232
233            0x17 => {
234                let cch = self.read_u16("PtgStr.cch")? as usize;
235                let utf16_len = cch.checked_mul(2).ok_or(PtgParseError::LengthOverflow)?;
236                let payload = self.read_slice(utf16_len, "PtgStr UTF-16 payload")?;
237                let text = decode_utf16le(payload)?;
238                PtgToken::Str(text)
239            }
240            0x18 => {
241                let subtype_offset = self.pos;
242                let subtype = self.read_u8("PtgExtended subtype")?;
243                let payload_len = match subtype {
244                    0x19 => 12,
245                    0x1D => 4,
246                    _ => {
247                        return Err(PtgParseError::UnknownExtendedSubtype {
248                            subtype,
249                            offset: subtype_offset,
250                        })
251                    }
252                };
253                let data = self.read_vec(payload_len, "PtgExtended payload")?;
254                PtgToken::Extended(PtgExtendedToken { subtype, data })
255            }
256            0x19 => {
257                let subtype_offset = self.pos;
258                let subtype = self.read_u8("PtgAttr subtype")?;
259                let payload_len = match subtype {
260                    0x01 | 0x02 | 0x08 | 0x10 | 0x20 | 0x21 | 0x40 | 0x41 | 0x80 => 2,
261                    0x04 => 10,
262                    _ => {
263                        return Err(PtgParseError::UnknownAttrSubtype {
264                            subtype,
265                            offset: subtype_offset,
266                        })
267                    }
268                };
269                let data = self.read_vec(payload_len, "PtgAttr payload")?;
270                PtgToken::Attr(PtgAttrToken { subtype, data })
271            }
272
273            0x1C => PtgToken::Err(self.read_u8("PtgErr code")?),
274            0x1D => PtgToken::Bool(self.read_u8("PtgBool value")? != 0),
275            0x1E => PtgToken::Int(self.read_u16("PtgInt value")?),
276            0x1F => PtgToken::Num(self.read_f64("PtgNum value")?),
277
278            op if matches_class(op, 0x20, 0x40, 0x60) => {
279                let class = class_from_opcode(op, 0x20, 0x40, 0x60, opcode_offset)?;
280                PtgToken::Array {
281                    class,
282                    data: self.read_array("PtgArray payload")?,
283                }
284            }
285            op if matches_class(op, 0x21, 0x41, 0x61) => {
286                let class = class_from_opcode(op, 0x21, 0x41, 0x61, opcode_offset)?;
287                PtgToken::Func {
288                    class,
289                    iftab: self.read_u16("PtgFunc iftab")?,
290                }
291            }
292            op if matches_class(op, 0x22, 0x42, 0x62) => {
293                let class = class_from_opcode(op, 0x22, 0x42, 0x62, opcode_offset)?;
294                let argc = self.read_u8("PtgFuncVar argc")?;
295                let iftab = self.read_u16("PtgFuncVar iftab")?;
296                PtgToken::FuncVar { class, argc, iftab }
297            }
298            op if matches_class(op, 0x23, 0x43, 0x63) => {
299                let class = class_from_opcode(op, 0x23, 0x43, 0x63, opcode_offset)?;
300                PtgToken::Name {
301                    class,
302                    name_index: self.read_u32("PtgName index")?,
303                }
304            }
305            op if matches_class(op, 0x24, 0x44, 0x64) => {
306                let class = class_from_opcode(op, 0x24, 0x44, 0x64, opcode_offset)?;
307                let row = self.read_u32("PtgRef row")?;
308                let col = self.read_u16("PtgRef col")?;
309                PtgToken::Ref { class, row, col }
310            }
311            op if matches_class(op, 0x25, 0x45, 0x65) => {
312                let class = class_from_opcode(op, 0x25, 0x45, 0x65, opcode_offset)?;
313                let first_row = self.read_u32("PtgArea first_row")?;
314                let last_row = self.read_u32("PtgArea last_row")?;
315                let first_col = self.read_u16("PtgArea first_col")?;
316                let last_col = self.read_u16("PtgArea last_col")?;
317                PtgToken::Area {
318                    class,
319                    first_row,
320                    last_row,
321                    first_col,
322                    last_col,
323                }
324            }
325            op if matches_class(op, 0x26, 0x46, 0x66) => {
326                let class = class_from_opcode(op, 0x26, 0x46, 0x66, opcode_offset)?;
327                let reserved = self.read_u32("PtgMemArea reserved")?;
328                let cce = self.read_u16("PtgMemArea cce")?;
329                PtgToken::MemArea {
330                    class,
331                    reserved,
332                    cce,
333                }
334            }
335            op if matches_class(op, 0x27, 0x47, 0x67) => {
336                let class = class_from_opcode(op, 0x27, 0x47, 0x67, opcode_offset)?;
337                let reserved = self.read_u32("PtgMemErr reserved")?;
338                let cce = self.read_u16("PtgMemErr cce")?;
339                PtgToken::MemErr {
340                    class,
341                    reserved,
342                    cce,
343                }
344            }
345            op if matches_class(op, 0x28, 0x48, 0x68) => {
346                let class = class_from_opcode(op, 0x28, 0x48, 0x68, opcode_offset)?;
347                let reserved = self.read_u32("PtgMemNoMem reserved")?;
348                let cce = self.read_u16("PtgMemNoMem cce")?;
349                PtgToken::MemNoMem {
350                    class,
351                    reserved,
352                    cce,
353                }
354            }
355            op if matches_class(op, 0x29, 0x49, 0x69) => {
356                let class = class_from_opcode(op, 0x29, 0x49, 0x69, opcode_offset)?;
357                let cce = usize::from(self.read_u16("PtgMemFunc cce")?);
358                let rgce = self.read_vec(cce, "PtgMemFunc nested rgce")?;
359                PtgToken::MemFunc { class, rgce }
360            }
361
362            op if matches_class(op, 0x2A, 0x4A, 0x6A) => {
363                let class = class_from_opcode(op, 0x2A, 0x4A, 0x6A, opcode_offset)?;
364                PtgToken::RefErr {
365                    class,
366                    data: self.read_array("PtgRefErr payload")?,
367                }
368            }
369            op if matches_class(op, 0x2B, 0x4B, 0x6B) => {
370                let class = class_from_opcode(op, 0x2B, 0x4B, 0x6B, opcode_offset)?;
371                PtgToken::AreaErr {
372                    class,
373                    data: self.read_array("PtgAreaErr payload")?,
374                }
375            }
376
377            op if matches_class(op, 0x2C, 0x4C, 0x6C) => {
378                let class = class_from_opcode(op, 0x2C, 0x4C, 0x6C, opcode_offset)?;
379                let row = self.read_u32("PtgRefN row")?;
380                let col = self.read_u16("PtgRefN col")?;
381                PtgToken::RefN { class, row, col }
382            }
383            op if matches_class(op, 0x2D, 0x4D, 0x6D) => {
384                let class = class_from_opcode(op, 0x2D, 0x4D, 0x6D, opcode_offset)?;
385                let first_row = self.read_u32("PtgAreaN first_row")?;
386                let last_row = self.read_u32("PtgAreaN last_row")?;
387                let first_col = self.read_u16("PtgAreaN first_col")?;
388                let last_col = self.read_u16("PtgAreaN last_col")?;
389                PtgToken::AreaN {
390                    class,
391                    first_row,
392                    last_row,
393                    first_col,
394                    last_col,
395                }
396            }
397
398            op if matches_class(op, 0x39, 0x59, 0x79) => {
399                let class = class_from_opcode(op, 0x39, 0x59, 0x79, opcode_offset)?;
400                let ixti = self.read_u16("PtgNameX ixti")?;
401                let name_index = self.read_u32("PtgNameX index")?;
402                PtgToken::NameX {
403                    class,
404                    ixti,
405                    name_index,
406                }
407            }
408            op if matches_class(op, 0x3A, 0x5A, 0x7A) => {
409                let class = class_from_opcode(op, 0x3A, 0x5A, 0x7A, opcode_offset)?;
410                let ixti = self.read_u16("PtgRef3d ixti")?;
411                let row = self.read_u32("PtgRef3d row")?;
412                let col = self.read_u16("PtgRef3d col")?;
413                PtgToken::Ref3d {
414                    class,
415                    ixti,
416                    row,
417                    col,
418                }
419            }
420            op if matches_class(op, 0x3B, 0x5B, 0x7B) => {
421                let class = class_from_opcode(op, 0x3B, 0x5B, 0x7B, opcode_offset)?;
422                let ixti = self.read_u16("PtgArea3d ixti")?;
423                let first_row = self.read_u32("PtgArea3d first_row")?;
424                let last_row = self.read_u32("PtgArea3d last_row")?;
425                let first_col = self.read_u16("PtgArea3d first_col")?;
426                let last_col = self.read_u16("PtgArea3d last_col")?;
427                PtgToken::Area3d {
428                    class,
429                    ixti,
430                    first_row,
431                    last_row,
432                    first_col,
433                    last_col,
434                }
435            }
436            op if matches_class(op, 0x3C, 0x5C, 0x7C) => {
437                let class = class_from_opcode(op, 0x3C, 0x5C, 0x7C, opcode_offset)?;
438                let ixti = self.read_u16("PtgRefErr3d ixti")?;
439                let data = self.read_array("PtgRefErr3d payload")?;
440                PtgToken::RefErr3d { class, ixti, data }
441            }
442            op if matches_class(op, 0x3D, 0x5D, 0x7D) => {
443                let class = class_from_opcode(op, 0x3D, 0x5D, 0x7D, opcode_offset)?;
444                let ixti = self.read_u16("PtgAreaErr3d ixti")?;
445                let data = self.read_array("PtgAreaErr3d payload")?;
446                PtgToken::AreaErr3d { class, ixti, data }
447            }
448
449            _ => {
450                return Err(PtgParseError::UnknownOpcode {
451                    opcode,
452                    offset: opcode_offset,
453                })
454            }
455        };
456
457        Ok(token)
458    }
459
460    fn read_u8(&mut self, context: &'static str) -> Result<u8, PtgParseError> {
461        let bytes = self.read_slice(1, context)?;
462        Ok(bytes[0])
463    }
464
465    fn read_u16(&mut self, context: &'static str) -> Result<u16, PtgParseError> {
466        let bytes = self.read_slice(2, context)?;
467        Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
468    }
469
470    fn read_u32(&mut self, context: &'static str) -> Result<u32, PtgParseError> {
471        let bytes = self.read_slice(4, context)?;
472        Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
473    }
474
475    fn read_f64(&mut self, context: &'static str) -> Result<f64, PtgParseError> {
476        let bytes = self.read_slice(8, context)?;
477        Ok(f64::from_le_bytes([
478            bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
479        ]))
480    }
481
482    fn read_vec(&mut self, len: usize, context: &'static str) -> Result<Vec<u8>, PtgParseError> {
483        Ok(self.read_slice(len, context)?.to_vec())
484    }
485
486    fn read_array<const N: usize>(
487        &mut self,
488        context: &'static str,
489    ) -> Result<[u8; N], PtgParseError> {
490        let bytes = self.read_slice(N, context)?;
491        let mut out = [0u8; N];
492        out.copy_from_slice(bytes);
493        Ok(out)
494    }
495
496    fn read_slice(&mut self, len: usize, context: &'static str) -> Result<&'a [u8], PtgParseError> {
497        let remaining = self.rgce.len().saturating_sub(self.pos);
498        if remaining < len {
499            return Err(PtgParseError::UnexpectedEof {
500                context,
501                needed: len,
502                remaining,
503            });
504        }
505
506        let start = self.pos;
507        let end = start + len;
508        self.pos = end;
509        Ok(&self.rgce[start..end])
510    }
511}
512
513fn decode_utf16le(bytes: &[u8]) -> Result<String, PtgParseError> {
514    if bytes.len() % 2 != 0 {
515        return Err(PtgParseError::InvalidUtf16 {
516            reason: "odd number of UTF-16 bytes".to_string(),
517        });
518    }
519
520    let mut words = Vec::with_capacity(bytes.len() / 2);
521    for chunk in bytes.chunks_exact(2) {
522        words.push(u16::from_le_bytes([chunk[0], chunk[1]]));
523    }
524
525    let mut result = String::new();
526    for item in decode_utf16(words.into_iter()) {
527        match item {
528            Ok(ch) => result.push(ch),
529            Err(_) => {
530                return Err(PtgParseError::InvalidUtf16 {
531                    reason: "invalid UTF-16 surrogate pair".to_string(),
532                })
533            }
534        }
535    }
536
537    Ok(result)
538}
539
540fn matches_class(opcode: u8, reference: u8, value: u8, array: u8) -> bool {
541    opcode == reference || opcode == value || opcode == array
542}
543
544fn class_from_opcode(
545    opcode: u8,
546    reference: u8,
547    value: u8,
548    array: u8,
549    offset: usize,
550) -> Result<PtgClass, PtgParseError> {
551    if opcode == reference {
552        return Ok(PtgClass::Reference);
553    }
554    if opcode == value {
555        return Ok(PtgClass::Value);
556    }
557    if opcode == array {
558        return Ok(PtgClass::Array);
559    }
560
561    Err(PtgParseError::UnknownOpcode { opcode, offset })
562}
563
564#[cfg(test)]
565mod tests {
566    use super::*;
567
568    #[test]
569    fn parse_simple_arithmetic_tokens() {
570        let rgce = [0x1E, 0x01, 0x00, 0x1E, 0x02, 0x00, 0x03];
571        let tokens = parse_ptg_tokens(&rgce).expect("arithmetic rgce should parse");
572
573        assert_eq!(
574            tokens,
575            vec![PtgToken::Int(1), PtgToken::Int(2), PtgToken::Add]
576        );
577    }
578
579    #[test]
580    fn parse_core_reference_tokens_snapshot() {
581        let rgce = [
582            0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0,
583            0x25, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x01,
584            0xC0,
585            0x21, 0x04, 0x00,
586        ];
587
588        let tokens = parse_ptg_tokens(&rgce).expect("reference tokens should parse");
589
590        insta::assert_debug_snapshot!(tokens, @r###"
591        [
592            Ref {
593                class: Reference,
594                row: 0,
595                col: 49152,
596            },
597            Area {
598                class: Reference,
599                first_row: 0,
600                last_row: 9,
601                first_col: 49152,
602                last_col: 49153,
603            },
604            Func {
605                class: Reference,
606                iftab: 4,
607            },
608        ]
609        "###);
610    }
611}