Skip to main content

xlsbye_formula/
decompile.rs

1use thiserror::Error;
2use xlsbye_core::types::CellError;
3
4use crate::ftab::{function_argc, function_name};
5use crate::names::{quote_sheet_name, resolve_defined_name, resolve_external_name};
6use crate::ptg::{parse_ptg_tokens, PtgParseError, PtgToken};
7use crate::refs::{
8    format_area_ref_from_encoded, format_cell_ref_from_encoded, format_shared_area_ref,
9    format_shared_cell_ref,
10};
11use crate::stack::DecompileStack;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14struct FormulaContext {
15    base_row: u32,
16    base_col: u32,
17}
18
19#[derive(Debug, Error, Clone, PartialEq, Eq)]
20pub enum FormulaError {
21    #[error("failed to parse Ptg token stream: {0}")]
22    Parse(#[from] PtgParseError),
23
24    #[error("formula stack underflow: need {needed} operand(s), have {available}")]
25    StackUnderflow { needed: usize, available: usize },
26
27    #[error("formula stack must contain exactly one expression at end, found {len}")]
28    InvalidFinalStack { len: usize },
29
30    #[error("unknown function index: {iftab}")]
31    UnknownFunctionIndex { iftab: u16 },
32
33    #[error("unknown error literal code: 0x{code:02X}")]
34    UnknownErrorCode { code: u8 },
35
36    #[error("unknown name index: {index}")]
37    UnknownNameIndex { index: u32 },
38
39    #[error("invalid sheet index: {index}")]
40    InvalidSheetIndex { index: u16 },
41}
42
43pub fn decompile_formula(
44    rgce: &[u8],
45    sheets: &[String],
46    names: &[(String, String)],
47) -> Result<String, FormulaError> {
48    decompile_formula_with_context(rgce, sheets, names, None)
49}
50
51pub fn decompile_shared_formula(
52    rgce: &[u8],
53    sheets: &[String],
54    names: &[(String, String)],
55    base_row: u32,
56    base_col: u32,
57) -> Result<String, FormulaError> {
58    decompile_formula_with_context(
59        rgce,
60        sheets,
61        names,
62        Some(FormulaContext { base_row, base_col }),
63    )
64}
65
66fn decompile_formula_with_context(
67    rgce: &[u8],
68    sheets: &[String],
69    names: &[(String, String)],
70    context: Option<FormulaContext>,
71) -> Result<String, FormulaError> {
72    if rgce.is_empty() {
73        return Ok(String::new());
74    }
75
76    let tokens = parse_ptg_tokens(rgce)?;
77    let mut stack = DecompileStack::default();
78
79    for token in tokens {
80        match token {
81            PtgToken::Exp { .. } | PtgToken::Tbl { .. } => {
82                stack.push(String::new());
83            }
84
85            PtgToken::Add => apply_binary_operator(&mut stack, "+")?,
86            PtgToken::Sub => apply_binary_operator(&mut stack, "-")?,
87            PtgToken::Mul => apply_binary_operator(&mut stack, "*")?,
88            PtgToken::Div => apply_binary_operator(&mut stack, "/")?,
89            PtgToken::Power => apply_binary_operator(&mut stack, "^")?,
90            PtgToken::Concat => apply_binary_operator(&mut stack, "&")?,
91            PtgToken::Lt => apply_binary_operator(&mut stack, "<")?,
92            PtgToken::Le => apply_binary_operator(&mut stack, "<=")?,
93            PtgToken::Eq => apply_binary_operator(&mut stack, "=")?,
94            PtgToken::Ge => apply_binary_operator(&mut stack, ">=")?,
95            PtgToken::Gt => apply_binary_operator(&mut stack, ">")?,
96            PtgToken::Ne => apply_binary_operator(&mut stack, "<>")?,
97            PtgToken::Isect => apply_binary_operator(&mut stack, " ")?,
98            PtgToken::Union => apply_binary_operator(&mut stack, ",")?,
99            PtgToken::Range => apply_binary_operator(&mut stack, ":")?,
100
101            PtgToken::UPlus => stack.apply_unary_prefix("+")?,
102            PtgToken::UMinus => stack.apply_unary_prefix("-")?,
103            PtgToken::Percent => stack.apply_unary_suffix("%")?,
104            PtgToken::Paren => stack.wrap_top("(", ")")?,
105            PtgToken::MissArg => stack.push(String::new()),
106
107            PtgToken::Str(value) => stack.push(format_string_literal(&value)),
108            PtgToken::Extended(_) => stack.push(String::new()),
109            PtgToken::Attr(attr) => {
110                if attr.is_sum() {
111                    let arg = stack.pop()?;
112                    stack.push(format!("SUM({arg})"));
113                }
114            }
115            PtgToken::Err(code) => {
116                stack.push(format_error_literal(code)?);
117            }
118            PtgToken::Bool(value) => {
119                stack.push(if value { "TRUE" } else { "FALSE" });
120            }
121            PtgToken::Int(value) => {
122                stack.push(value.to_string());
123            }
124            PtgToken::Num(value) => {
125                stack.push(value.to_string());
126            }
127
128            PtgToken::Array { .. } => stack.push("{...}"),
129            PtgToken::Func { iftab, .. } => {
130                let name = function_name(iftab)
131                    .ok_or(FormulaError::UnknownFunctionIndex { iftab })?
132                    .to_string();
133                let argc = fixed_argc(iftab)?;
134                push_function_call(&mut stack, &name, argc)?;
135            }
136            PtgToken::FuncVar { argc, iftab, .. } => {
137                let name = function_name(iftab)
138                    .ok_or(FormulaError::UnknownFunctionIndex { iftab })?
139                    .to_string();
140                push_function_call(&mut stack, &name, usize::from(argc))?;
141            }
142            PtgToken::Name { name_index, .. } => {
143                let name = resolve_defined_name(names, name_index)
144                    .ok_or(FormulaError::UnknownNameIndex { index: name_index })?;
145                stack.push(name.to_string());
146            }
147            PtgToken::Ref { row, col, .. } => {
148                stack.push(format_cell_ref_from_encoded(row, col));
149            }
150            PtgToken::RefN { row, col, .. } => {
151                if let Some(context) = context {
152                    stack.push(format_shared_cell_ref(
153                        context.base_row,
154                        context.base_col,
155                        row,
156                        col,
157                    ));
158                } else {
159                    stack.push(format_cell_ref_from_encoded(row, col));
160                }
161            }
162            PtgToken::Area {
163                first_row,
164                last_row,
165                first_col,
166                last_col,
167                ..
168            } => {
169                stack.push(format_area_ref_from_encoded(
170                    first_row, first_col, last_row, last_col,
171                ));
172            }
173            PtgToken::AreaN {
174                first_row,
175                last_row,
176                first_col,
177                last_col,
178                ..
179            } => {
180                if let Some(context) = context {
181                    stack.push(format_shared_area_ref(
182                        context.base_row,
183                        context.base_col,
184                        first_row,
185                        first_col,
186                        last_row,
187                        last_col,
188                    ));
189                } else {
190                    stack.push(format_area_ref_from_encoded(
191                        first_row, first_col, last_row, last_col,
192                    ));
193                }
194            }
195            PtgToken::MemArea { .. } | PtgToken::MemErr { .. } | PtgToken::MemNoMem { .. } => {}
196            PtgToken::MemFunc { rgce, .. } => {
197                let nested = decompile_formula_with_context(&rgce, sheets, names, context)?;
198                stack.push(nested);
199            }
200
201            PtgToken::RefErr { .. } | PtgToken::AreaErr { .. } => stack.push("#REF!"),
202
203            PtgToken::NameX {
204                ixti, name_index, ..
205            } => {
206                let resolved = resolve_external_name(sheets, names, ixti, name_index)
207                    .ok_or(FormulaError::UnknownNameIndex { index: name_index })?;
208                stack.push(resolved);
209            }
210            PtgToken::Ref3d { ixti, row, col, .. } => {
211                let sheet = sheets
212                    .get(usize::from(ixti))
213                    .ok_or(FormulaError::InvalidSheetIndex { index: ixti })?;
214                let cell_ref = format_cell_ref_from_encoded(row, col);
215                stack.push(format!("{}!{cell_ref}", quote_sheet_name(sheet)));
216            }
217            PtgToken::Area3d {
218                ixti,
219                first_row,
220                last_row,
221                first_col,
222                last_col,
223                ..
224            } => {
225                let sheet = sheets
226                    .get(usize::from(ixti))
227                    .ok_or(FormulaError::InvalidSheetIndex { index: ixti })?;
228                let area_ref =
229                    format_area_ref_from_encoded(first_row, first_col, last_row, last_col);
230                stack.push(format!("{}!{area_ref}", quote_sheet_name(sheet)));
231            }
232            PtgToken::RefErr3d { ixti, .. } | PtgToken::AreaErr3d { ixti, .. } => {
233                let sheet = sheets
234                    .get(usize::from(ixti))
235                    .ok_or(FormulaError::InvalidSheetIndex { index: ixti })?;
236                stack.push(format!("{}!#REF!", quote_sheet_name(sheet)));
237            }
238        }
239    }
240
241    stack.finish()
242}
243
244fn apply_binary_operator(stack: &mut DecompileStack, operator: &str) -> Result<(), FormulaError> {
245    let (left, right) = stack.pop2()?;
246    stack.push(format!("{left}{operator}{right}"));
247    Ok(())
248}
249
250fn push_function_call(
251    stack: &mut DecompileStack,
252    function_name: &str,
253    argc: usize,
254) -> Result<(), FormulaError> {
255    let args = stack.pop_n(argc)?;
256    let joined = args.join(",");
257    stack.push(format!("{function_name}({joined})"));
258    Ok(())
259}
260
261fn fixed_argc(iftab: u16) -> Result<usize, FormulaError> {
262    let argc = function_argc(iftab).ok_or(FormulaError::UnknownFunctionIndex { iftab })?;
263
264    if argc >= 128 {
265        return Ok(1);
266    }
267
268    Ok(usize::from(argc))
269}
270
271fn format_string_literal(value: &str) -> String {
272    let escaped = value.replace('"', "\"\"");
273    format!("\"{escaped}\"")
274}
275
276fn format_error_literal(code: u8) -> Result<&'static str, FormulaError> {
277    CellError::from_code(code)
278        .map(|err| err.as_str())
279        .ok_or(FormulaError::UnknownErrorCode { code })
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    fn push_u16(buf: &mut Vec<u8>, value: u16) {
287        buf.extend(value.to_le_bytes());
288    }
289
290    fn push_u32(buf: &mut Vec<u8>, value: u32) {
291        buf.extend(value.to_le_bytes());
292    }
293
294    fn encode_col(col: u16, row_abs: bool, col_abs: bool) -> u16 {
295        let mut encoded = col & 0x3FFF;
296        if !col_abs {
297            encoded |= 0x4000;
298        }
299        if !row_abs {
300            encoded |= 0x8000;
301        }
302        encoded
303    }
304
305    #[test]
306    fn decompile_formula_snapshots() {
307        let sheets = vec!["Sheet1".to_string(), "Data Sheet".to_string()];
308        let names = vec![("LocalName".to_string(), "Sheet1!A1".to_string())];
309
310        let mut simple = vec![0x1E];
311        push_u16(&mut simple, 1);
312        simple.push(0x1E);
313        push_u16(&mut simple, 2);
314        simple.push(0x03);
315
316        let mut sum_area = vec![0x25];
317        push_u32(&mut sum_area, 0);
318        push_u32(&mut sum_area, 9);
319        push_u16(&mut sum_area, encode_col(0, false, false));
320        push_u16(&mut sum_area, encode_col(1, false, false));
321        sum_area.push(0x21);
322        push_u16(&mut sum_area, 4);
323
324        let mut nested_if = vec![0x24];
325        push_u32(&mut nested_if, 0);
326        push_u16(&mut nested_if, encode_col(0, false, false));
327        nested_if.push(0x1E);
328        push_u16(&mut nested_if, 5);
329        nested_if.push(0x0D);
330        nested_if.push(0x25);
331        push_u32(&mut nested_if, 0);
332        push_u32(&mut nested_if, 9);
333        push_u16(&mut nested_if, encode_col(1, false, false));
334        push_u16(&mut nested_if, encode_col(1, false, false));
335        nested_if.push(0x21);
336        push_u16(&mut nested_if, 4);
337        nested_if.push(0x1E);
338        push_u16(&mut nested_if, 0);
339        nested_if.push(0x21);
340        push_u16(&mut nested_if, 1);
341
342        let mut ref3d = vec![0x3A];
343        push_u16(&mut ref3d, 0);
344        push_u32(&mut ref3d, 0);
345        push_u16(&mut ref3d, encode_col(0, false, false));
346
347        let mut strings_bools_errors = vec![0x17];
348        push_u16(&mut strings_bools_errors, 5);
349        strings_bools_errors.extend([b'H', 0, b'e', 0, b'l', 0, b'l', 0, b'o', 0]);
350        strings_bools_errors.push(0x1D);
351        strings_bools_errors.push(1);
352        strings_bools_errors.push(0x08);
353        strings_bools_errors.push(0x1C);
354        strings_bools_errors.push(0x17);
355        strings_bools_errors.push(0x08);
356
357        let mut unary = vec![0x1E];
358        push_u16(&mut unary, 50);
359        unary.push(0x14);
360        unary.push(0x13);
361
362        let mut missing = vec![0x16, 0x1E];
363        push_u16(&mut missing, 1);
364        missing.push(0x22);
365        missing.push(2);
366        push_u16(&mut missing, 4);
367
368        let mut named = vec![0x23];
369        push_u32(&mut named, 1);
370
371        let mut named_x = vec![0x39];
372        push_u16(&mut named_x, 1);
373        push_u32(&mut named_x, 1);
374
375        let output = [
376            (
377                "simple_arithmetic",
378                decompile_formula(&simple, &sheets, &names).expect("simple arithmetic formula"),
379            ),
380            (
381                "sum_area",
382                decompile_formula(&sum_area, &sheets, &names).expect("sum area formula"),
383            ),
384            (
385                "nested_if",
386                decompile_formula(&nested_if, &sheets, &names).expect("nested if formula"),
387            ),
388            (
389                "ref_3d",
390                decompile_formula(&ref3d, &sheets, &names).expect("3d ref formula"),
391            ),
392            (
393                "string_bool_error",
394                decompile_formula(&strings_bools_errors, &sheets, &names)
395                    .expect("string bool error formula"),
396            ),
397            (
398                "unary_minus_percent",
399                decompile_formula(&unary, &sheets, &names).expect("unary formula"),
400            ),
401            (
402                "missing_arg",
403                decompile_formula(&missing, &sheets, &names).expect("missing arg formula"),
404            ),
405            (
406                "name",
407                decompile_formula(&named, &sheets, &names).expect("name formula"),
408            ),
409            (
410                "namex",
411                decompile_formula(&named_x, &sheets, &names).expect("namex formula"),
412            ),
413        ];
414
415        let mut snapshot = String::new();
416        for (label, formula) in output {
417            snapshot.push_str(label);
418            snapshot.push_str(": ");
419            snapshot.push_str(&formula);
420            snapshot.push('\n');
421        }
422
423        insta::assert_snapshot!(snapshot, @r###"
424        simple_arithmetic: 1+2
425        sum_area: SUM(A1:B10)
426        nested_if: IF(A1>5,SUM(B1:B10),0)
427        ref_3d: Sheet1!A1
428        string_bool_error: "Hello"&TRUE&#REF!
429        unary_minus_percent: -50%
430        missing_arg: SUM(,1)
431        name: LocalName
432        namex: 'Data Sheet'!LocalName
433        "###);
434    }
435}