1use std::char::decode_utf16;
2
3use thiserror::Error;
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6pub enum PtgClass {
7 Reference,
8 Value,
9 Array,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct PtgAttrToken {
14 pub subtype: u8,
15 pub data: Vec<u8>,
16}
17
18impl PtgAttrToken {
19 pub fn is_sum(&self) -> bool {
20 self.subtype == 0x10
21 }
22}
23
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct PtgExtendedToken {
26 pub subtype: u8,
27 pub data: Vec<u8>,
28}
29
30#[derive(Debug, Clone, PartialEq)]
31pub enum PtgToken {
32 Exp { data: [u8; 4] },
33 Tbl { data: [u8; 4] },
34
35 Add,
36 Sub,
37 Mul,
38 Div,
39 Power,
40 Concat,
41 Lt,
42 Le,
43 Eq,
44 Ge,
45 Gt,
46 Ne,
47 Isect,
48 Union,
49 Range,
50
51 UPlus,
52 UMinus,
53 Percent,
54 Paren,
55 MissArg,
56
57 Str(String),
58 Extended(PtgExtendedToken),
59 Attr(PtgAttrToken),
60 Err(u8),
61 Bool(bool),
62 Int(u16),
63 Num(f64),
64
65 Array { class: PtgClass, data: [u8; 14] },
66 Func { class: PtgClass, iftab: u16 },
67 FuncVar { class: PtgClass, argc: u8, iftab: u16 },
68 Name { class: PtgClass, name_index: u32 },
69 Ref {
70 class: PtgClass,
71 row: u32,
72 col: u16,
73 },
74 Area {
75 class: PtgClass,
76 first_row: u32,
77 last_row: u32,
78 first_col: u16,
79 last_col: u16,
80 },
81 MemArea {
82 class: PtgClass,
83 reserved: u32,
84 cce: u16,
85 },
86 MemErr {
87 class: PtgClass,
88 reserved: u32,
89 cce: u16,
90 },
91 MemNoMem {
92 class: PtgClass,
93 reserved: u32,
94 cce: u16,
95 },
96 MemFunc {
97 class: PtgClass,
98 rgce: Vec<u8>,
99 },
100 RefN {
101 class: PtgClass,
102 row: u32,
103 col: u16,
104 },
105 AreaN {
106 class: PtgClass,
107 first_row: u32,
108 last_row: u32,
109 first_col: u16,
110 last_col: u16,
111 },
112 RefErr {
113 class: PtgClass,
114 data: [u8; 6],
115 },
116 AreaErr {
117 class: PtgClass,
118 data: [u8; 12],
119 },
120 NameX {
121 class: PtgClass,
122 ixti: u16,
123 name_index: u32,
124 },
125 Ref3d {
126 class: PtgClass,
127 ixti: u16,
128 row: u32,
129 col: u16,
130 },
131 Area3d {
132 class: PtgClass,
133 ixti: u16,
134 first_row: u32,
135 last_row: u32,
136 first_col: u16,
137 last_col: u16,
138 },
139 RefErr3d {
140 class: PtgClass,
141 ixti: u16,
142 data: [u8; 6],
143 },
144 AreaErr3d {
145 class: PtgClass,
146 ixti: u16,
147 data: [u8; 12],
148 },
149}
150
151#[derive(Debug, Error, Clone, PartialEq, Eq)]
152pub enum PtgParseError {
153 #[error(
154 "unexpected EOF while reading {context}: need {needed} byte(s), have {remaining} byte(s)"
155 )]
156 UnexpectedEof {
157 context: &'static str,
158 needed: usize,
159 remaining: usize,
160 },
161
162 #[error("unknown Ptg opcode 0x{opcode:02X} at byte offset {offset}")]
163 UnknownOpcode { opcode: u8, offset: usize },
164
165 #[error("unknown PtgAttr subtype 0x{subtype:02X} at byte offset {offset}")]
166 UnknownAttrSubtype { subtype: u8, offset: usize },
167
168 #[error("unknown PtgExtended subtype 0x{subtype:02X} at byte offset {offset}")]
169 UnknownExtendedSubtype { subtype: u8, offset: usize },
170
171 #[error("invalid UTF-16 string payload: {reason}")]
172 InvalidUtf16 { reason: String },
173
174 #[error("string payload length overflow")]
175 LengthOverflow,
176}
177
178pub fn parse_ptg_tokens(rgce: &[u8]) -> Result<Vec<PtgToken>, PtgParseError> {
179 let mut parser = PtgParser { rgce, pos: 0 };
180 let mut tokens = Vec::new();
181
182 while !parser.is_eof() {
183 tokens.push(parser.parse_next()?);
184 }
185
186 Ok(tokens)
187}
188
189struct PtgParser<'a> {
190 rgce: &'a [u8],
191 pos: usize,
192}
193
194impl<'a> PtgParser<'a> {
195 fn is_eof(&self) -> bool {
196 self.pos >= self.rgce.len()
197 }
198
199 fn parse_next(&mut self) -> Result<PtgToken, PtgParseError> {
200 let opcode_offset = self.pos;
201 let opcode = self.read_u8("Ptg opcode")?;
202
203 let token = match opcode {
204 0x01 => PtgToken::Exp {
205 data: self.read_array("PtgExp data")?,
206 },
207 0x02 => PtgToken::Tbl {
208 data: self.read_array("PtgTbl data")?,
209 },
210
211 0x03 => PtgToken::Add,
212 0x04 => PtgToken::Sub,
213 0x05 => PtgToken::Mul,
214 0x06 => PtgToken::Div,
215 0x07 => PtgToken::Power,
216 0x08 => PtgToken::Concat,
217 0x09 => PtgToken::Lt,
218 0x0A => PtgToken::Le,
219 0x0B => PtgToken::Eq,
220 0x0C => PtgToken::Ge,
221 0x0D => PtgToken::Gt,
222 0x0E => PtgToken::Ne,
223 0x0F => PtgToken::Isect,
224 0x10 => PtgToken::Union,
225 0x11 => PtgToken::Range,
226
227 0x12 => PtgToken::UPlus,
228 0x13 => PtgToken::UMinus,
229 0x14 => PtgToken::Percent,
230 0x15 => PtgToken::Paren,
231 0x16 => PtgToken::MissArg,
232
233 0x17 => {
234 let cch = self.read_u16("PtgStr.cch")? as usize;
235 let utf16_len = cch.checked_mul(2).ok_or(PtgParseError::LengthOverflow)?;
236 let payload = self.read_slice(utf16_len, "PtgStr UTF-16 payload")?;
237 let text = decode_utf16le(payload)?;
238 PtgToken::Str(text)
239 }
240 0x18 => {
241 let subtype_offset = self.pos;
242 let subtype = self.read_u8("PtgExtended subtype")?;
243 let payload_len = match subtype {
244 0x19 => 12,
245 0x1D => 4,
246 _ => {
247 return Err(PtgParseError::UnknownExtendedSubtype {
248 subtype,
249 offset: subtype_offset,
250 })
251 }
252 };
253 let data = self.read_vec(payload_len, "PtgExtended payload")?;
254 PtgToken::Extended(PtgExtendedToken { subtype, data })
255 }
256 0x19 => {
257 let subtype_offset = self.pos;
258 let subtype = self.read_u8("PtgAttr subtype")?;
259 let payload_len = match subtype {
260 0x01 | 0x02 | 0x08 | 0x10 | 0x20 | 0x21 | 0x40 | 0x41 | 0x80 => 2,
261 0x04 => 10,
262 _ => {
263 return Err(PtgParseError::UnknownAttrSubtype {
264 subtype,
265 offset: subtype_offset,
266 })
267 }
268 };
269 let data = self.read_vec(payload_len, "PtgAttr payload")?;
270 PtgToken::Attr(PtgAttrToken { subtype, data })
271 }
272
273 0x1C => PtgToken::Err(self.read_u8("PtgErr code")?),
274 0x1D => PtgToken::Bool(self.read_u8("PtgBool value")? != 0),
275 0x1E => PtgToken::Int(self.read_u16("PtgInt value")?),
276 0x1F => PtgToken::Num(self.read_f64("PtgNum value")?),
277
278 op if matches_class(op, 0x20, 0x40, 0x60) => {
279 let class = class_from_opcode(op, 0x20, 0x40, 0x60, opcode_offset)?;
280 PtgToken::Array {
281 class,
282 data: self.read_array("PtgArray payload")?,
283 }
284 }
285 op if matches_class(op, 0x21, 0x41, 0x61) => {
286 let class = class_from_opcode(op, 0x21, 0x41, 0x61, opcode_offset)?;
287 PtgToken::Func {
288 class,
289 iftab: self.read_u16("PtgFunc iftab")?,
290 }
291 }
292 op if matches_class(op, 0x22, 0x42, 0x62) => {
293 let class = class_from_opcode(op, 0x22, 0x42, 0x62, opcode_offset)?;
294 let argc = self.read_u8("PtgFuncVar argc")?;
295 let iftab = self.read_u16("PtgFuncVar iftab")?;
296 PtgToken::FuncVar { class, argc, iftab }
297 }
298 op if matches_class(op, 0x23, 0x43, 0x63) => {
299 let class = class_from_opcode(op, 0x23, 0x43, 0x63, opcode_offset)?;
300 PtgToken::Name {
301 class,
302 name_index: self.read_u32("PtgName index")?,
303 }
304 }
305 op if matches_class(op, 0x24, 0x44, 0x64) => {
306 let class = class_from_opcode(op, 0x24, 0x44, 0x64, opcode_offset)?;
307 let row = self.read_u32("PtgRef row")?;
308 let col = self.read_u16("PtgRef col")?;
309 PtgToken::Ref { class, row, col }
310 }
311 op if matches_class(op, 0x25, 0x45, 0x65) => {
312 let class = class_from_opcode(op, 0x25, 0x45, 0x65, opcode_offset)?;
313 let first_row = self.read_u32("PtgArea first_row")?;
314 let last_row = self.read_u32("PtgArea last_row")?;
315 let first_col = self.read_u16("PtgArea first_col")?;
316 let last_col = self.read_u16("PtgArea last_col")?;
317 PtgToken::Area {
318 class,
319 first_row,
320 last_row,
321 first_col,
322 last_col,
323 }
324 }
325 op if matches_class(op, 0x26, 0x46, 0x66) => {
326 let class = class_from_opcode(op, 0x26, 0x46, 0x66, opcode_offset)?;
327 let reserved = self.read_u32("PtgMemArea reserved")?;
328 let cce = self.read_u16("PtgMemArea cce")?;
329 PtgToken::MemArea {
330 class,
331 reserved,
332 cce,
333 }
334 }
335 op if matches_class(op, 0x27, 0x47, 0x67) => {
336 let class = class_from_opcode(op, 0x27, 0x47, 0x67, opcode_offset)?;
337 let reserved = self.read_u32("PtgMemErr reserved")?;
338 let cce = self.read_u16("PtgMemErr cce")?;
339 PtgToken::MemErr {
340 class,
341 reserved,
342 cce,
343 }
344 }
345 op if matches_class(op, 0x28, 0x48, 0x68) => {
346 let class = class_from_opcode(op, 0x28, 0x48, 0x68, opcode_offset)?;
347 let reserved = self.read_u32("PtgMemNoMem reserved")?;
348 let cce = self.read_u16("PtgMemNoMem cce")?;
349 PtgToken::MemNoMem {
350 class,
351 reserved,
352 cce,
353 }
354 }
355 op if matches_class(op, 0x29, 0x49, 0x69) => {
356 let class = class_from_opcode(op, 0x29, 0x49, 0x69, opcode_offset)?;
357 let cce = usize::from(self.read_u16("PtgMemFunc cce")?);
358 let rgce = self.read_vec(cce, "PtgMemFunc nested rgce")?;
359 PtgToken::MemFunc { class, rgce }
360 }
361
362 op if matches_class(op, 0x2A, 0x4A, 0x6A) => {
363 let class = class_from_opcode(op, 0x2A, 0x4A, 0x6A, opcode_offset)?;
364 PtgToken::RefErr {
365 class,
366 data: self.read_array("PtgRefErr payload")?,
367 }
368 }
369 op if matches_class(op, 0x2B, 0x4B, 0x6B) => {
370 let class = class_from_opcode(op, 0x2B, 0x4B, 0x6B, opcode_offset)?;
371 PtgToken::AreaErr {
372 class,
373 data: self.read_array("PtgAreaErr payload")?,
374 }
375 }
376
377 op if matches_class(op, 0x2C, 0x4C, 0x6C) => {
378 let class = class_from_opcode(op, 0x2C, 0x4C, 0x6C, opcode_offset)?;
379 let row = self.read_u32("PtgRefN row")?;
380 let col = self.read_u16("PtgRefN col")?;
381 PtgToken::RefN { class, row, col }
382 }
383 op if matches_class(op, 0x2D, 0x4D, 0x6D) => {
384 let class = class_from_opcode(op, 0x2D, 0x4D, 0x6D, opcode_offset)?;
385 let first_row = self.read_u32("PtgAreaN first_row")?;
386 let last_row = self.read_u32("PtgAreaN last_row")?;
387 let first_col = self.read_u16("PtgAreaN first_col")?;
388 let last_col = self.read_u16("PtgAreaN last_col")?;
389 PtgToken::AreaN {
390 class,
391 first_row,
392 last_row,
393 first_col,
394 last_col,
395 }
396 }
397
398 op if matches_class(op, 0x39, 0x59, 0x79) => {
399 let class = class_from_opcode(op, 0x39, 0x59, 0x79, opcode_offset)?;
400 let ixti = self.read_u16("PtgNameX ixti")?;
401 let name_index = self.read_u32("PtgNameX index")?;
402 PtgToken::NameX {
403 class,
404 ixti,
405 name_index,
406 }
407 }
408 op if matches_class(op, 0x3A, 0x5A, 0x7A) => {
409 let class = class_from_opcode(op, 0x3A, 0x5A, 0x7A, opcode_offset)?;
410 let ixti = self.read_u16("PtgRef3d ixti")?;
411 let row = self.read_u32("PtgRef3d row")?;
412 let col = self.read_u16("PtgRef3d col")?;
413 PtgToken::Ref3d {
414 class,
415 ixti,
416 row,
417 col,
418 }
419 }
420 op if matches_class(op, 0x3B, 0x5B, 0x7B) => {
421 let class = class_from_opcode(op, 0x3B, 0x5B, 0x7B, opcode_offset)?;
422 let ixti = self.read_u16("PtgArea3d ixti")?;
423 let first_row = self.read_u32("PtgArea3d first_row")?;
424 let last_row = self.read_u32("PtgArea3d last_row")?;
425 let first_col = self.read_u16("PtgArea3d first_col")?;
426 let last_col = self.read_u16("PtgArea3d last_col")?;
427 PtgToken::Area3d {
428 class,
429 ixti,
430 first_row,
431 last_row,
432 first_col,
433 last_col,
434 }
435 }
436 op if matches_class(op, 0x3C, 0x5C, 0x7C) => {
437 let class = class_from_opcode(op, 0x3C, 0x5C, 0x7C, opcode_offset)?;
438 let ixti = self.read_u16("PtgRefErr3d ixti")?;
439 let data = self.read_array("PtgRefErr3d payload")?;
440 PtgToken::RefErr3d { class, ixti, data }
441 }
442 op if matches_class(op, 0x3D, 0x5D, 0x7D) => {
443 let class = class_from_opcode(op, 0x3D, 0x5D, 0x7D, opcode_offset)?;
444 let ixti = self.read_u16("PtgAreaErr3d ixti")?;
445 let data = self.read_array("PtgAreaErr3d payload")?;
446 PtgToken::AreaErr3d { class, ixti, data }
447 }
448
449 _ => {
450 return Err(PtgParseError::UnknownOpcode {
451 opcode,
452 offset: opcode_offset,
453 })
454 }
455 };
456
457 Ok(token)
458 }
459
460 fn read_u8(&mut self, context: &'static str) -> Result<u8, PtgParseError> {
461 let bytes = self.read_slice(1, context)?;
462 Ok(bytes[0])
463 }
464
465 fn read_u16(&mut self, context: &'static str) -> Result<u16, PtgParseError> {
466 let bytes = self.read_slice(2, context)?;
467 Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
468 }
469
470 fn read_u32(&mut self, context: &'static str) -> Result<u32, PtgParseError> {
471 let bytes = self.read_slice(4, context)?;
472 Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
473 }
474
475 fn read_f64(&mut self, context: &'static str) -> Result<f64, PtgParseError> {
476 let bytes = self.read_slice(8, context)?;
477 Ok(f64::from_le_bytes([
478 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
479 ]))
480 }
481
482 fn read_vec(&mut self, len: usize, context: &'static str) -> Result<Vec<u8>, PtgParseError> {
483 Ok(self.read_slice(len, context)?.to_vec())
484 }
485
486 fn read_array<const N: usize>(
487 &mut self,
488 context: &'static str,
489 ) -> Result<[u8; N], PtgParseError> {
490 let bytes = self.read_slice(N, context)?;
491 let mut out = [0u8; N];
492 out.copy_from_slice(bytes);
493 Ok(out)
494 }
495
496 fn read_slice(&mut self, len: usize, context: &'static str) -> Result<&'a [u8], PtgParseError> {
497 let remaining = self.rgce.len().saturating_sub(self.pos);
498 if remaining < len {
499 return Err(PtgParseError::UnexpectedEof {
500 context,
501 needed: len,
502 remaining,
503 });
504 }
505
506 let start = self.pos;
507 let end = start + len;
508 self.pos = end;
509 Ok(&self.rgce[start..end])
510 }
511}
512
513fn decode_utf16le(bytes: &[u8]) -> Result<String, PtgParseError> {
514 if bytes.len() % 2 != 0 {
515 return Err(PtgParseError::InvalidUtf16 {
516 reason: "odd number of UTF-16 bytes".to_string(),
517 });
518 }
519
520 let mut words = Vec::with_capacity(bytes.len() / 2);
521 for chunk in bytes.chunks_exact(2) {
522 words.push(u16::from_le_bytes([chunk[0], chunk[1]]));
523 }
524
525 let mut result = String::new();
526 for item in decode_utf16(words.into_iter()) {
527 match item {
528 Ok(ch) => result.push(ch),
529 Err(_) => {
530 return Err(PtgParseError::InvalidUtf16 {
531 reason: "invalid UTF-16 surrogate pair".to_string(),
532 })
533 }
534 }
535 }
536
537 Ok(result)
538}
539
540fn matches_class(opcode: u8, reference: u8, value: u8, array: u8) -> bool {
541 opcode == reference || opcode == value || opcode == array
542}
543
544fn class_from_opcode(
545 opcode: u8,
546 reference: u8,
547 value: u8,
548 array: u8,
549 offset: usize,
550) -> Result<PtgClass, PtgParseError> {
551 if opcode == reference {
552 return Ok(PtgClass::Reference);
553 }
554 if opcode == value {
555 return Ok(PtgClass::Value);
556 }
557 if opcode == array {
558 return Ok(PtgClass::Array);
559 }
560
561 Err(PtgParseError::UnknownOpcode { opcode, offset })
562}
563
564#[cfg(test)]
565mod tests {
566 use super::*;
567
568 #[test]
569 fn parse_simple_arithmetic_tokens() {
570 let rgce = [0x1E, 0x01, 0x00, 0x1E, 0x02, 0x00, 0x03];
571 let tokens = parse_ptg_tokens(&rgce).expect("arithmetic rgce should parse");
572
573 assert_eq!(
574 tokens,
575 vec![PtgToken::Int(1), PtgToken::Int(2), PtgToken::Add]
576 );
577 }
578
579 #[test]
580 fn parse_core_reference_tokens_snapshot() {
581 let rgce = [
582 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0,
583 0x25, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x01,
584 0xC0,
585 0x21, 0x04, 0x00,
586 ];
587
588 let tokens = parse_ptg_tokens(&rgce).expect("reference tokens should parse");
589
590 insta::assert_debug_snapshot!(tokens, @r###"
591 [
592 Ref {
593 class: Reference,
594 row: 0,
595 col: 49152,
596 },
597 Area {
598 class: Reference,
599 first_row: 0,
600 last_row: 9,
601 first_col: 49152,
602 last_col: 49153,
603 },
604 Func {
605 class: Reference,
606 iftab: 4,
607 },
608 ]
609 "###);
610 }
611}