datex_core/decompiler/
mod.rs

1use std::collections::HashMap; // FIXME #222 no-std
2use std::collections::HashSet;
3use std::fmt::Write;
4use std::io::Cursor;
5// FIXME #223 no-std
6
7
8use crate::compiler::{
9    compile_template_with_refs, CompileOptions,
10};
11use crate::values::core_values::decimal::utils::decimal_to_string;
12use crate::values::value_container::ValueContainer;
13use crate::global::protocol_structures::instructions::{
14    DecimalData, Float32Data, Float64Data, FloatAsInt16Data, FloatAsInt32Data,
15    Instruction, Int16Data, Int32Data, Int64Data, Int8Data, ShortTextData,
16    TextData,
17};
18use crate::parser::body;
19use crate::parser::body::DXBParserError;
20use syntect::easy::HighlightLines;
21use syntect::highlighting::{Style, Theme, ThemeSet};
22use syntect::parsing::{SyntaxDefinition, SyntaxSetBuilder};
23use syntect::util::{as_24_bit_terminal_escaped, LinesWithEndings};
24
25/// Decompiles a DXB bytecode body into a human-readable string representation.
26pub fn decompile_body(
27    dxb_body: &[u8],
28    options: DecompileOptions,
29) -> Result<String, DXBParserError> {
30    let mut initial_state = DecompilerState {
31        dxb_body,
32        options,
33
34        scopes: vec![ScopeState {
35            scope_type: (ScopeType::default(), true),
36            ..ScopeState::default()
37        }],
38
39        current_label: 0,
40        labels: HashMap::new(),
41        inserted_labels: HashSet::new(),
42        variables: HashMap::new(),
43    };
44
45    decompile_loop(&mut initial_state)
46}
47
48/// Decompiles a single DATEX value into a human-readable string representation.
49pub fn decompile_value(
50    value: &ValueContainer,
51    options: DecompileOptions,
52) -> String {
53    let (compiled_value, _) =
54        compile_template_with_refs("?", &[value], CompileOptions::default())
55            .unwrap();
56    decompile_body(&compiled_value, options).unwrap()
57}
58
59fn int_to_label(n: i32) -> String {
60    // Convert the integer to a base-26 number, with 'a' being the 0th digit
61    let mut label = String::new();
62    let mut n = n;
63
64    while n > 0 {
65        // Get the remainder when n is divided by 26
66        let r = n % 26;
67
68        // Add the corresponding character (a-z) to the label
69        label.insert(0, (r as u8 + b'a') as char);
70
71        // Divide n by 26 and continue
72        n /= 26;
73    }
74
75    // If the label is empty, it means the input integer was 0, so return "a"
76    if label.is_empty() {
77        label = "a".to_string();
78    }
79
80    label
81}
82
83#[derive(Debug, Clone, Default)]
84pub struct DecompileOptions {
85    pub formatted: bool,
86    pub colorized: bool,
87    /// display slots with generated variable names
88    pub resolve_slots: bool,
89    /// TODO #224
90    /// when set to true, the output is generated as compatible as possible with JSON, e.g. by
91    /// always adding double quotes around keys
92    pub json_compat: bool,
93}
94
95impl DecompileOptions {
96    pub fn json() -> Self {
97        DecompileOptions {
98            json_compat: true,
99            ..DecompileOptions::default()
100        }
101    }
102
103    /// Fomarts and colorizes the output
104    pub fn colorized() -> Self {
105        DecompileOptions {
106            colorized: true,
107            formatted: true,
108            resolve_slots: true,
109            ..DecompileOptions::default()
110        }
111    }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
115pub enum ScopeType {
116    #[default]
117    Default,
118    Tuple,
119    Array,
120    Object,
121    SlotAssignment,
122    Transparent,
123}
124
125impl ScopeType {
126    pub fn write_start(
127        &self,
128        output: &mut String,
129    ) -> Result<(), DXBParserError> {
130        match self {
131            ScopeType::Default => write!(output, "(")?,
132            ScopeType::Tuple => write!(output, "(")?,
133            ScopeType::Array => write!(output, "[")?,
134            ScopeType::Object => write!(output, "{{")?,
135            ScopeType::SlotAssignment => {
136                // do nothing, slot assignment does not have a start
137            }
138            ScopeType::Transparent => {}
139        }
140        Ok(())
141    }
142    pub fn write_end(&self, output: &mut String) -> Result<(), DXBParserError> {
143        match self {
144            ScopeType::Default => write!(output, ")")?,
145            ScopeType::Tuple => write!(output, ")")?,
146            ScopeType::Array => write!(output, "]")?,
147            ScopeType::Object => write!(output, "}}")?,
148            ScopeType::SlotAssignment => {
149                // do nothing, slot assignment does not have an end
150            }
151            ScopeType::Transparent => {}
152        }
153        Ok(())
154    }
155}
156
157#[derive(Debug, Clone, Default)]
158struct ScopeState {
159    /// true if this is the outer scope (default scope)
160    is_outer_scope: bool,
161    // TODO #225: use BinaryOperator instead of Instruction
162    active_operator: Option<(Instruction, bool)>,
163    scope_type: (ScopeType, bool),
164    /// skip inserted comma for next item (already inserted before key)
165    skip_comma_for_next_item: bool,
166    /// set to true if next item is a key (e.g. in object)
167    next_item_is_key: bool,
168    /// set to true if the current active scope should be closed after the next term
169    close_scope_after_term: bool,
170}
171
172impl ScopeState {
173    fn write_start(&self, output: &mut String) -> Result<(), DXBParserError> {
174        self.scope_type.0.write_start(output)
175    }
176    fn write_end(&self, output: &mut String) -> Result<(), DXBParserError> {
177        self.scope_type.0.write_end(output)
178    }
179}
180
181#[derive(Debug, Clone)]
182struct DecompilerState<'a> {
183    // stack of scopes
184    scopes: Vec<ScopeState>,
185
186    // dxb
187    dxb_body: &'a [u8],
188
189    // options
190    options: DecompileOptions,
191
192    // state
193    current_label: i32,
194    labels: HashMap<usize, String>,
195    inserted_labels: HashSet<usize>,
196    variables: HashMap<u16, String>,
197}
198
199impl DecompilerState<'_> {
200    fn get_current_scope(&mut self) -> &mut ScopeState {
201        self.scopes.last_mut().unwrap()
202    }
203    fn new_scope(&mut self, scope_type: ScopeType) {
204        self.scopes.push(ScopeState {
205            scope_type: (scope_type, true),
206            ..ScopeState::default()
207        });
208    }
209    fn close_scope(&mut self) {
210        if !self.scopes.is_empty() {
211            self.scopes.pop();
212        }
213    }
214}
215
216impl DecompilerState<'_> {
217    fn get_insert_label(&mut self, index: usize) -> String {
218        // existing
219        if self.labels.contains_key(&index) {
220            self.labels
221                .get(&index)
222                .unwrap_or(&"?invalid?".to_string())
223                .to_string()
224        }
225        // new
226        else {
227            let name = self.current_label.to_string();
228            self.current_label += 1;
229            self.labels.insert(index, name.clone());
230            name
231        }
232    }
233}
234
235fn decompile_loop(
236    state: &mut DecompilerState,
237) -> Result<String, DXBParserError> {
238    let mut output = String::new();
239
240    let instruction_iterator = body::iterate_instructions(state.dxb_body);
241
242    for instruction in instruction_iterator {
243        let instruction = instruction?;
244        //info!("decompile instruction: {:?}", instruction);
245
246        match instruction {
247            Instruction::Int8(Int8Data(i8)) => {
248                handle_before_term(state, &mut output, true)?;
249                write!(output, "{i8}")?;
250                handle_after_term(state, &mut output, true)?;
251            }
252            Instruction::Int16(Int16Data(i16)) => {
253                handle_before_term(state, &mut output, true)?;
254                write!(output, "{i16}")?;
255                handle_after_term(state, &mut output, true)?;
256            }
257            Instruction::Int32(Int32Data(i32)) => {
258                handle_before_term(state, &mut output, true)?;
259                write!(output, "{i32}")?;
260                handle_after_term(state, &mut output, true)?;
261            }
262            Instruction::Int64(Int64Data(i64)) => {
263                handle_before_term(state, &mut output, true)?;
264                write!(output, "{i64}")?;
265                handle_after_term(state, &mut output, true)?;
266            }
267            Instruction::DecimalF32(Float32Data(f32)) => {
268                handle_before_term(state, &mut output, true)?;
269                write!(
270                    output,
271                    "{}",
272                    decimal_to_string(f32, state.options.json_compat)
273                )?;
274                handle_after_term(state, &mut output, true)?;
275            }
276            Instruction::DecimalF64(Float64Data(f64)) => {
277                handle_before_term(state, &mut output, true)?;
278                write!(
279                    output,
280                    "{}",
281                    decimal_to_string(f64, state.options.json_compat)
282                )?;
283                handle_after_term(state, &mut output, true)?;
284            }
285            Instruction::DecimalAsInt16(FloatAsInt16Data(i16)) => {
286                handle_before_term(state, &mut output, true)?;
287                write!(
288                    output,
289                    "{}",
290                    decimal_to_string(i16 as f32, state.options.json_compat)
291                )?;
292                handle_after_term(state, &mut output, true)?;
293            }
294            Instruction::DecimalAsInt32(FloatAsInt32Data(i32)) => {
295                handle_before_term(state, &mut output, true)?;
296                write!(
297                    output,
298                    "{}",
299                    decimal_to_string(i32 as f32, state.options.json_compat)
300                )?;
301                handle_after_term(state, &mut output, true)?;
302            }
303            Instruction::Decimal(DecimalData(big_decimal)) => {
304                handle_before_term(state, &mut output, true)?;
305                write!(output, "{big_decimal}")?;
306                handle_after_term(state, &mut output, true)?;
307            }
308            Instruction::ShortText(ShortTextData(text)) => {
309                handle_before_term(state, &mut output, true)?;
310                let text = escape_text(&text);
311                write!(output, "\"{text}\"")?;
312                handle_after_term(state, &mut output, true)?;
313            }
314            Instruction::Text(TextData(text)) => {
315                handle_before_term(state, &mut output, true)?;
316                let text = escape_text(&text);
317                write!(output, "\"{text}\"")?;
318                handle_after_term(state, &mut output, true)?;
319            }
320            Instruction::True => {
321                handle_before_term(state, &mut output, false)?;
322                write!(output, "true")?;
323                handle_after_term(state, &mut output, false)?;
324            }
325            Instruction::False => {
326                handle_before_term(state, &mut output, false)?;
327                write!(output, "false")?;
328                handle_after_term(state, &mut output, false)?;
329            }
330            Instruction::Null => {
331                handle_before_term(state, &mut output, false)?;
332                write!(output, "null")?;
333                handle_after_term(state, &mut output, false)?;
334            }
335            Instruction::Endpoint(endpoint) => {
336                handle_before_term(state, &mut output, false)?;
337                write!(output, "{endpoint}")?;
338                handle_after_term(state, &mut output, false)?;
339            }
340            Instruction::ArrayStart => {
341                handle_before_term(state, &mut output, false)?;
342                state.new_scope(ScopeType::Array);
343                state.get_current_scope().write_start(&mut output)?;
344            }
345            Instruction::ObjectStart => {
346                handle_before_term(state, &mut output, false)?;
347                state.new_scope(ScopeType::Object);
348                state.get_current_scope().write_start(&mut output)?;
349            }
350            Instruction::TupleStart => {
351                handle_before_term(state, &mut output, true)?;
352                state.new_scope(ScopeType::Tuple);
353                state.get_current_scope().write_start(&mut output)?;
354            }
355            Instruction::ScopeStart => {
356                handle_before_term(state, &mut output, true)?;
357                state.new_scope(ScopeType::Default);
358                state.get_current_scope().write_start(&mut output)?;
359            }
360            Instruction::ScopeEnd => {
361                handle_scope_close(state, &mut output)?;
362                handle_after_term(state, &mut output, true)?;
363            }
364            Instruction::KeyValueShortText(text_data) => {
365                handle_before_term(state, &mut output, false)?;
366                // prevent redundant comma for value
367                state.get_current_scope().skip_comma_for_next_item = true;
368                write_text_key(
369                    state,
370                    &text_data.0,
371                    &mut output,
372                    state.options.formatted,
373                )?;
374            }
375            Instruction::KeyValueDynamic => {
376                handle_before_term(state, &mut output, false)?;
377                state.get_current_scope().skip_comma_for_next_item = true;
378                state.get_current_scope().next_item_is_key = true;
379            }
380            Instruction::CloseAndStore => {
381                if state.options.formatted {
382                    write!(output, ";\r\n")?;
383                } else {
384                    write!(output, ";")?;
385                }
386            }
387
388            // operations
389            Instruction::Add
390            | Instruction::Subtract
391            | Instruction::Multiply
392            | Instruction::Divide => {
393                handle_before_term(state, &mut output, false)?;
394                state.new_scope(ScopeType::Transparent);
395                state.get_current_scope().active_operator =
396                    Some((instruction, true));
397            }
398
399            // slots
400            Instruction::AllocateSlot(address) => {
401                handle_before_term(state, &mut output, false)?;
402                state.new_scope(ScopeType::SlotAssignment);
403                // if resolve_slots is enabled, write the slot as variable
404                if state.options.resolve_slots {
405                    // TODO #95: generate variable name for slot
406                    write!(output, "#{} := ", address.0)?;
407                } else {
408                    // otherwise just write the slot address
409                    write!(output, "#{} := ", address.0)?;
410                }
411                handle_after_term(state, &mut output, false)?;
412            }
413            Instruction::GetSlot(address) => {
414                handle_before_term(state, &mut output, false)?;
415                // if resolve_slots is enabled, write the slot as variable
416                if state.options.resolve_slots {
417                    // TODO #96: get variable name for slot
418                    write!(output, "#{}", address.0)?;
419                } else {
420                    // otherwise just write the slot address
421                    write!(output, "#{}", address.0)?;
422                }
423                handle_after_term(state, &mut output, false)?;
424            }
425            Instruction::DropSlot(address) => {
426                // if resolve_slots is enabled, write the slot as variable
427                if state.options.resolve_slots {
428                    // TODO #97: generate variable name for slot
429                    write!(output, "#drop {}", address.0)?;
430                } else {
431                    // otherwise just write the slot address
432                    write!(output, "#drop {}", address.0)?;
433                }
434            }
435            Instruction::SetSlot(address) => {
436                handle_before_term(state, &mut output, false)?;
437                state.new_scope(ScopeType::SlotAssignment);
438                // if resolve_slots is enabled, write the slot as variable
439                if state.options.resolve_slots {
440                    // TODO #98: generate variable name for slot
441                    write!(output, "#{} = ", address.0)?;
442                } else {
443                    // otherwise just write the slot address
444                    write!(output, "#{} = ", address.0)?;
445                }
446            }
447
448            Instruction::CreateRef => {
449                handle_before_term(state, &mut output, false)?;
450                state.get_current_scope().skip_comma_for_next_item = true;
451                write!(output, "$")?;
452            }
453
454            Instruction::RemoteExecution => {
455                handle_before_term(state, &mut output, false)?;
456                state.get_current_scope().active_operator = Some((instruction, true,));
457            }
458
459            Instruction::ExecutionBlock(data) => {
460                handle_before_term(state, &mut output, true)?;
461                // decompile data.body
462                let decompiled_body = decompile_body(
463                    &data.body,
464                    state.options.clone(),
465                )?;
466                let slot_mapping = data.injected_slots.iter().enumerate().map(|(k, v)| {
467                    format!(
468                        "#{v} => #{k}"
469                    )
470                }).collect::<Vec<_>>().join(", ");
471                // write the decompiled body
472                write!(output, "[{slot_mapping}]({decompiled_body})")?;
473            }
474            
475            _ => {
476                write!(output, "[[{instruction}]]")?;
477            }
478        }
479    }
480
481    // add syntax highlighting
482    if state.options.colorized {
483        output = apply_syntax_highlighting(output)?;
484    }
485
486    Ok(output)
487}
488
489pub fn apply_syntax_highlighting(
490    datex_script: String,
491) -> Result<String, DXBParserError> {
492    let mut output = String::new();
493
494    // load datex syntax + custom theme
495    static DATEX_SCRIPT_DEF: &str = include_str!(
496        "../../datex-language/datex.tmbundle/Syntaxes/datex.sublime-text"
497    );
498    static DATEX_THEME_DEF: &str =
499        include_str!("../../datex-language/themes/datex-dark.tmTheme");
500    let mut builder = SyntaxSetBuilder::new();
501    let syntax = SyntaxDefinition::load_from_str(DATEX_SCRIPT_DEF, true, None)
502        .expect("Failed to load syntax definition");
503    builder.add(syntax);
504    let theme: Theme =
505        ThemeSet::load_from_reader(&mut Cursor::new(DATEX_THEME_DEF))
506            .expect("Failed to load theme");
507
508    let ps = builder.build();
509    let syntax = ps.find_syntax_by_extension("dx").unwrap();
510    let mut h = HighlightLines::new(syntax, &theme);
511
512    for line in LinesWithEndings::from(&datex_script) {
513        let ranges: Vec<(Style, &str)> = h.highlight_line(line, &ps).unwrap();
514        let escaped = as_24_bit_terminal_escaped(&ranges[..], false);
515        write!(output, "{escaped}")?;
516    }
517    // reset style
518    write!(output, "\x1b[0m")?;
519    Ok(output)
520}
521
522fn escape_text(text: &str) -> String {
523    // escape quotes and backslashes in text
524    text.replace('\\', r#"\\"#)
525        .replace('"', r#"\""#)
526        .replace('\u{0008}', r#"\b"#)
527        .replace('\u{000c}', r#"\f"#)
528        .replace('\r', r#"\r"#)
529        .replace('\t', r#"\t"#)
530        .replace('\u{000b}', r#"\v"#)
531        .replace('\n', r#"\n"#)
532}
533
534fn write_text_key(
535    state: &mut DecompilerState,
536    text: &str,
537    output: &mut String,
538    formatted: bool,
539) -> Result<(), DXBParserError> {
540    // if text does not just contain a-z, A-Z, 0-9, _, and starts with a-z, A-Z,  _, add quotes
541    let text = if !state.options.json_compat && is_alphanumeric_identifier(text)
542    {
543        text.to_string()
544    } else {
545        format!("\"{}\"", escape_text(text))
546    };
547    if formatted {
548        write!(output, "{text}: ")?;
549    } else {
550        write!(output, "{text}:")?;
551    }
552    Ok(())
553}
554
555fn is_alphanumeric_identifier(s: &str) -> bool {
556    let mut chars = s.chars();
557
558    // First character must be a-z, A-Z, or _
559    match chars.next() {
560        Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
561        _ => return false,
562    }
563
564    // Remaining characters: a-z, A-Z, 0-9, _, or -
565    chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
566}
567
568/// insert syntax before a term (e.g. operators, commas, etc.)
569/// if is_standalone_key is set to true, no parenthesis are wrapped around the item if it is a key,
570/// e.g. for text ("key": "value") the parenthesis are not needed
571fn handle_before_term(
572    state: &mut DecompilerState,
573    output: &mut String,
574    is_standalone_key: bool,
575) -> Result<(), DXBParserError> {
576    handle_before_operand(state, output)?;
577    handle_before_item(state, output, is_standalone_key)?;
578    Ok(())
579}
580
581/// if is_standalone_key is set to true, no parenthesis are wrapped around the item if it is a key,
582/// e.g. for text ("key": "value") the parenthesis are not needed
583fn handle_after_term(
584    state: &mut DecompilerState,
585    output: &mut String,
586    is_standalone_key: bool,
587) -> Result<(), DXBParserError> {
588    let close_scope = state.get_current_scope().close_scope_after_term;
589    if close_scope {
590        // close scope after term
591        state.close_scope();
592    }
593
594    // next_item_is_key
595    if state.get_current_scope().next_item_is_key {
596        if !is_standalone_key || close_scope {
597            write!(output, ")")?;
598        }
599        // set next_item_is_key to false
600        state.get_current_scope().next_item_is_key = false;
601        if state.options.formatted {
602            write!(output, ": ")?;
603        } else {
604            write!(output, ":")?;
605        }
606        // prevent redundant comma before value
607        state.get_current_scope().skip_comma_for_next_item = true;
608    }
609
610    Ok(())
611}
612
613/// before scope close (insert scope closing syntax)
614fn handle_scope_close(
615    state: &mut DecompilerState,
616    output: &mut String,
617) -> Result<(), DXBParserError> {
618    let scope = state.get_current_scope();
619    // close only if not outer scope
620    if !scope.is_outer_scope {
621        state.get_current_scope().write_end(output)?;
622    }
623    // close scope
624    state.close_scope();
625    Ok(())
626}
627
628/// insert comma syntax before a term (e.g. ",")
629/// if is_standalone_key is set to true, no parenthesis are wrapped around the item if it is a key,
630/// e.g. for text ("key": "value") the parenthesis are not needed
631fn handle_before_item(
632    state: &mut DecompilerState,
633    output: &mut String,
634    is_standalone_key: bool,
635) -> Result<(), DXBParserError> {
636    let formatted = state.options.formatted;
637    let scope = state.get_current_scope();
638
639    // if next_item_is_key, add opening parenthesis
640    if !is_standalone_key && scope.next_item_is_key {
641        write!(output, "(")?;
642    }
643
644    match scope.scope_type {
645        (_, true) => {
646            // if first is true, set to false
647            scope.scope_type.1 = false;
648        }
649        (ScopeType::Array | ScopeType::Object | ScopeType::Tuple, false)
650            if !scope.skip_comma_for_next_item =>
651        {
652            if formatted {
653                write!(output, ", ")?;
654            } else {
655                write!(output, ",")?;
656            }
657        }
658        _ => {
659            // don't insert comma for default scope
660        }
661    }
662
663    // reset skip_comma_for_next_item flag
664    scope.skip_comma_for_next_item = false;
665
666    Ok(())
667}
668
669/// insert operator syntax before an operand (e.g. +, -, etc.)
670fn handle_before_operand(
671    state: &mut DecompilerState,
672    output: &mut String,
673) -> Result<(), DXBParserError> {
674    if let Some(operator) = state.get_current_scope().active_operator.take() {
675        // handle the operator before the operand
676        match operator {
677            (_, true) => {
678                // if first is true, set to false
679                state.get_current_scope().active_operator =
680                    Some((operator.0.clone(), false));
681            }
682            (Instruction::Add, false) => {
683                write_operator(state, output, "+")?;
684                state.get_current_scope().close_scope_after_term = true;
685            }
686            (Instruction::Subtract, false) => {
687                write_operator(state, output, "-")?;
688                state.get_current_scope().close_scope_after_term = true;
689            }
690            (Instruction::Multiply, false) => {
691                write_operator(state, output, "*")?;
692                state.get_current_scope().close_scope_after_term = true;
693            }
694            (Instruction::Divide, false) => {
695                write_operator(state, output, "/")?;
696                state.get_current_scope().close_scope_after_term = true;
697            }
698            (Instruction::RemoteExecution, false) => {
699                write_operator(state, output, "::")?;
700                state.get_current_scope().close_scope_after_term = false;
701            }
702            _ => {
703                panic!("Invalid operator: {operator:?}");
704            }
705        }
706    }
707    Ok(())
708}
709
710fn write_operator(
711    state: &mut DecompilerState,
712    output: &mut String,
713    operator: &str,
714) -> Result<(), DXBParserError> {
715    write!(output, " {operator} ")?;
716    Ok(())
717}