datex_core/decompiler/
mod.rs

1mod constants;
2
3
4use std::borrow::Cow;
5use std::cell::Cell;
6use std::collections::HashMap;
7use std::collections::HashSet;
8use std::vec;
9
10use constants::tokens::get_code_token;
11use crate::datex_values::SlotIdentifier;
12use crate::parser::header::has_dxb_magic_number;
13use crate::utils::color::AnsiCodes;
14use crate::utils::color::Color;
15use crate::utils::logger::LoggerContext;
16use lazy_static::lazy_static;
17use regex::Regex;
18use crate::datex_values::Value;
19
20use crate::global::binary_codes::BinaryCode;
21use crate::parser::header;
22use crate::parser::body;
23
24use self::constants::tokens::get_code_color;
25
26lazy_static!{
27	static ref NEW_LINE:Regex = Regex::new(r"\r\n").unwrap();
28	static ref LAST_LINE:Regex = Regex::new(r"   (.)$").unwrap();
29	static ref INDENT:String ="\r\n   ".to_string();
30
31}
32
33
34/**
35 * Converts DXB (with or without header) to DATEX Script
36 */
37pub fn decompile(ctx: &LoggerContext, dxb:&[u8], formatted:bool, colorized:bool, resolve_slots:bool) -> String {
38
39	let mut body = dxb;
40
41	// header?
42	if has_dxb_magic_number(dxb) {
43		let (_header, _body) = header::parse_dxb_header(dxb);
44		body = _body;
45	}
46
47	return decompile_body(ctx, body, formatted, colorized, resolve_slots);
48}
49
50
51pub fn decompile_body(ctx: &LoggerContext, dxb_body:&[u8], formatted:bool, colorized:bool, resolve_slots:bool) -> String {
52	
53	let mut initial_state = DecompilerGlobalState {
54		ctx,
55		dxb_body,
56		index: &Cell::from(0),
57		is_end_instruction: &Cell::from(false),
58
59		formatted, 
60		colorized,
61		resolve_slots,
62
63		current_label: 0,
64		labels: HashMap::new(),
65		inserted_labels: HashSet::new(),
66		variables: HashMap::new(),
67	};
68
69	return decompile_loop(&mut initial_state);
70}
71
72fn int_to_label(n: i32) -> String {
73	// Convert the integer to a base-26 number, with 'a' being the 0th digit
74	let mut label = String::new();
75	let mut n = n;
76
77	while n > 0 {
78		// Get the remainder when n is divided by 26
79		let r = n % 26;
80
81		// Add the corresponding character (a-z) to the label
82		label.insert(0, (r as u8 + b'a') as char);
83
84		// Divide n by 26 and continue
85		n /= 26;
86	}
87
88	// If the label is empty, it means the input integer was 0, so return "a"
89	if label.is_empty() {
90		label = "a".to_string();
91	}
92
93	label
94}
95
96
97struct DecompilerGlobalState<'a> {
98	// ctx
99	ctx: &'a LoggerContext,
100
101	// dxb
102	dxb_body:&'a [u8], 
103	index: &'a Cell<usize>,
104	is_end_instruction: &'a Cell<bool>,
105
106	// options
107	formatted: bool,
108	colorized: bool,
109	resolve_slots: bool, // display slots with generated variable names
110
111	// state
112	current_label: i32,
113	labels: HashMap<usize, String>,
114	inserted_labels: HashSet<usize>,
115	variables: HashMap<u16, String>
116}
117
118impl DecompilerGlobalState<'_> {
119	fn get_insert_label(&mut self, index:usize) -> String {
120		// existing
121		if self.labels.contains_key(&index) {
122			return self.labels.get(&index).or(Some(&"?invalid?".to_string())).unwrap().to_string();
123		}
124		// new
125		else {
126			let name = self.current_label.to_string();
127			self.current_label += 1;
128			self.labels.insert(index, name.clone());
129			return name;
130		}
131	}
132
133
134	// returns variable name and variable type if initialization
135	fn get_variable_name(&mut self, slot:&SlotIdentifier) -> (String, String) {
136		// return slot name
137		if slot.is_reserved() || slot.is_object_slot() || !self.resolve_slots {
138			return (slot.to_string(), "".to_string());
139		}
140		// existing variable
141		if self.variables.contains_key(&slot.index) {
142			return (self.variables.get(&slot.index).or(Some(&"?invalid?".to_string())).unwrap().to_string(), "".to_string())
143		}
144		// init variable
145		else {
146			let name = int_to_label(self.current_label);
147			self.current_label += 1;
148			self.variables.insert(slot.index, name.clone());
149			return (name, "var".to_string());
150		}
151	}
152}
153
154
155
156fn decompile_loop(state: &mut DecompilerGlobalState) -> String {
157	let mut out:String = "".to_string();
158
159	// let logger = Logger::new_for_development(&state.ctx, "Decompiler");
160
161	let instruction_iterator = body::iterate_instructions(state.dxb_body, state.index, state.is_end_instruction);
162
163	// flags - initial values
164	let mut open_element_comma = false;
165	let mut last_was_value = false;
166	let mut last_was_property_access = false;
167	let mut is_indexed_element = false;
168
169	let mut next_assign_action: Option<u8> = None;
170	let mut connective_size_stack: Vec<usize> = vec![];
171	let mut connective_type_stack:Vec<BinaryCode> = vec![];
172
173	for instruction in instruction_iterator {
174		
175		let code = instruction.code;
176
177		// is element instruction (in arrays, tuples, ..)
178		let is_new_element =  match code {
179			BinaryCode::ELEMENT => true,
180			BinaryCode::ELEMENT_WITH_KEY => true,
181			BinaryCode::ELEMENT_WITH_DYNAMIC_KEY => true,
182			BinaryCode::ELEMENT_WITH_INT_KEY => true,
183			BinaryCode::INTERNAL_OBJECT_SLOT => true,
184			_ => false
185		};
186
187		// closing array, object, ...
188		let is_closing = match code {
189			BinaryCode::CLOSE_AND_STORE => true,
190			BinaryCode::SUBSCOPE_END => true,
191			BinaryCode::ARRAY_END => true,
192			BinaryCode::OBJECT_END => true,
193			BinaryCode::TUPLE_END => true,
194			_ => false
195		};
196
197		// binary codes around which there is no space required
198		let no_space_around = match code {
199			BinaryCode::CLOSE_AND_STORE => true,
200			BinaryCode::CHILD_ACTION => true,
201			BinaryCode::CHILD_GET => true,
202			BinaryCode::CHILD_GET_REF => true,
203			BinaryCode::CHILD_SET => true,
204			BinaryCode::CHILD_SET_REFERENCE => true,
205			_ => false
206		};
207
208		let add_comma = open_element_comma && is_new_element; // comma still has to be closed, possible when the next code starts a new element
209
210		// space between
211		if state.formatted && last_was_value && !add_comma && !no_space_around && !is_indexed_element && !is_closing {
212			out += " ";
213		}
214		last_was_value = true;
215		is_indexed_element = false; // reset
216
217		// check flags:
218		// comma
219		if add_comma {
220			open_element_comma = false;
221			if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();} // light grey color for property keys
222			out += if state.formatted {",\r\n"} else {","}
223		}
224
225		
226		let has_slot = instruction.slot.is_some();
227		let slot = instruction.slot.unwrap_or_default();
228
229		let has_primitive_value = instruction.primitive_value.is_some();
230		let primitive_value = instruction.primitive_value.unwrap_or_default();
231		let mut custom_primitive_color = false;
232
233		// slot to variable mapping
234		let variable_info = if has_slot { state.get_variable_name(&slot)} else {("".to_string(),"".to_string())};
235		let variable_name = variable_info.0;
236		let variable_prefix = variable_info.1;
237
238		// coloring
239		if state.colorized {
240			// handle property key strings
241			if last_was_property_access && (code == BinaryCode::TEXT || code == BinaryCode::SHORT_TEXT) && primitive_value.can_omit_quotes() {
242				out += &get_code_color(&BinaryCode::ELEMENT_WITH_KEY).as_ansi_rgb(); // light grey color for property keys
243			}
244			// normal coloring
245			else if code != BinaryCode::CLOSE_AND_STORE { // color is added later for CLOSE_AND_STORE
246				let color = get_code_color(&code);
247				if color == Color::_UNKNOWN && has_primitive_value {
248					custom_primitive_color = true;
249				}
250				else {
251					out += &color.as_ansi_rgb();
252				}
253			}
254		}
255
256
257		// token to string
258
259	
260		match code {
261			// slot based
262			BinaryCode::INTERNAL_VAR 			    => out += &format!("{variable_name}"),
263			// only for backwards compatibility
264			BinaryCode::LABEL 			    => out += &format!("$_{variable_name}"),
265			BinaryCode::SET_INTERNAL_VAR => {
266				if state.colorized {out += &Color::RESERVED.as_ansi_rgb();}
267				out += &variable_prefix;
268				if variable_prefix.len()!=0 {out += " "};
269				if state.colorized {out += &get_code_color(&code).as_ansi_rgb();}
270				out += &variable_name;
271				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
272				out += " = ";
273			},
274			BinaryCode::INIT_INTERNAL_VAR => {
275				if state.colorized {out += &Color::RESERVED.as_ansi_rgb();}
276				out += &variable_prefix;
277				if variable_prefix.len()!=0 {out += " "};
278				if state.colorized {out += &get_code_color(&code).as_ansi_rgb();}
279				out += &variable_name;
280				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
281				out += " := ";
282			},
283			BinaryCode::SET_INTERNAL_VAR_REFERENCE 	=> {
284				if state.colorized {out += &Color::RESERVED.as_ansi_rgb();}
285				out += &variable_prefix;
286				if variable_prefix.len()!=0 {out += " "};
287				if state.colorized {out += &get_code_color(&code).as_ansi_rgb();}
288				out += &variable_name;
289				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
290				out += " $= ";
291			},
292
293			// pointer
294			BinaryCode::INIT_POINTER => {
295				if state.colorized {out += &Color::RESERVED.as_ansi_rgb();}
296				out += &instruction.value.unwrap().to_string();
297				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
298				out += " := ";
299			},
300			BinaryCode::SET_POINTER => {
301				if state.colorized {out += &Color::RESERVED.as_ansi_rgb();}
302				out += &instruction.value.unwrap().to_string();
303				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
304				out += " =";
305			},
306
307			// assign actions (override primitive value default behaviour)
308			BinaryCode::CHILD_ACTION => out += &get_code_token(&BinaryCode::CHILD_ACTION, state.formatted),
309		
310			// special primitive value formatting
311			BinaryCode::ELEMENT_WITH_KEY            => out += &format!("{}:", primitive_value.to_key_string()),
312			BinaryCode::ELEMENT_WITH_INT_KEY        => out += &format!("{}:", primitive_value.to_key_string()),             
313			BinaryCode::INTERNAL_OBJECT_SLOT        => out += &format!("{}:", SlotIdentifier::new(primitive_value.get_as_unsigned_integer() as u16)),        
314
315			// resolve relativ path, path is stored in text primitive
316			BinaryCode::RESOLVE_RELATIVE_PATH        => out += primitive_value.get_as_text(),
317
318			// indexed element without key
319			BinaryCode::ELEMENT	=> {
320				is_indexed_element = true; // don't add whitespace in front of next value for correct indentation
321			},
322
323			// logical connectives
324			BinaryCode::CONJUNCTION	=> {
325				out += "(";
326				connective_type_stack.push(BinaryCode::CONJUNCTION);
327				connective_size_stack.push(primitive_value.get_as_unsigned_integer());
328			},
329			BinaryCode::DISJUNCTION	=> {
330				out += "(";
331				connective_type_stack.push(BinaryCode::DISJUNCTION);
332				connective_size_stack.push(primitive_value.get_as_unsigned_integer());
333			},
334
335			// jmp
336			BinaryCode::JMP	=> {
337				let label = state.get_insert_label(primitive_value.get_as_unsigned_integer());
338				out += &format!("jmp {}", label);
339				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
340				out += ";";
341			},
342			BinaryCode::JTR	=> {
343				let label = state.get_insert_label(primitive_value.get_as_unsigned_integer());
344				out += &format!("jtr {}", label)
345			},
346			BinaryCode::JFA	=> {
347				let label = state.get_insert_label(primitive_value.get_as_unsigned_integer());
348				out += &format!("jfa {}", label)
349			},
350
351			// scope
352			BinaryCode::SCOPE_BLOCK_START => {
353				let scope = &mut decompile_body(&state.ctx, &primitive_value.get_as_buffer(), state.formatted, state.colorized, state.resolve_slots);
354				
355				// multi line scope TODO: check multiline (problem cannot check scope.contains(";"), because escape codes can contain ";")
356				if true {
357					*scope += ")";
358					out += "(";
359					// ----------
360					if state.formatted {
361						out += &INDENT;
362						out += &NEW_LINE.replace_all(   // add spaces to every new line
363							&scope, 
364							&INDENT.to_string()
365						);
366					};
367					// ----------
368				}
369				else {
370					scope.pop(); // remove last character (;)
371					scope.pop();
372					scope.pop();
373					out += scope;
374				}
375			},
376
377			BinaryCode::CLOSE_AND_STORE => {
378				// newline+spaces before, remove, add ';' and add newline afterwards
379				let empty: &[_] = &['\r', '\n', ' '];
380				out = out.trim_end_matches(empty).to_string();
381				if state.colorized {out += &get_code_color(&code).as_ansi_rgb()}
382				out += &get_code_token(&code, state.formatted);
383				// newline if not end of file
384				if state.formatted && state.index.get() < state.dxb_body.len() {
385					out += "\r\n";
386				}
387			}
388
389			_ => {
390				// primitive value default
391				if has_primitive_value {
392					if last_was_property_access {out += &primitive_value.to_key_string()}
393					else if custom_primitive_color {out += &primitive_value.to_string_colorized()}
394					else {out += &Value::to_string(&primitive_value)}
395				}
396				// complex value
397				else if instruction.value.is_some() {
398					out += &instruction.value.unwrap().to_string();
399				}
400				// fallback if no string representation possible [hex code]
401				else {
402					out += &get_code_token(&code, state.formatted)
403				}
404			}
405		}
406
407
408		// enter new subscope - continue at index?
409		if instruction.subscope_continue {
410			let inner = Cow::from(decompile_loop(state));
411			let is_empty = inner.len() == 0;
412			let newline_count = inner.chars().filter(|c| *c == '\n').count();
413
414			// only if content inside brackets, and multiple lines
415			if state.formatted && !is_empty && newline_count>0 {
416				out += &INDENT;
417				out += &NEW_LINE.replace_all(   // add spaces to every new line
418					&inner, 
419					&INDENT.to_string()
420				);
421				out += "\r\n";
422			}
423
424			// no content inside brackets or single line
425			else {
426				out += &NEW_LINE.replace_all(&inner, "").trim_end(); // remove remaining new line + spaces in last line
427			}
428		}
429
430
431		// after value insert : finish assign action?
432		if next_assign_action.is_some() {
433			// coloring
434			if state.colorized {
435				out += &Color::DEFAULT.as_ansi_rgb();
436			}
437			// +=, -=, ...
438			out += " ";
439			let assign_type = next_assign_action.unwrap();
440
441			match assign_type {
442				1 => out += "$",
443				2 => out += "",
444				_ => out += &get_code_token(&BinaryCode::try_from(assign_type).expect("enum conversion error"), false)
445			}
446			out += "= ";
447			last_was_value = false; // no additional space afterwards
448			next_assign_action = None; // reset
449		}
450
451		// check for new assign actions
452		match code {
453			BinaryCode::CHILD_ACTION => next_assign_action = Some(primitive_value.get_as_integer() as u8),
454			BinaryCode::CHILD_SET_REFERENCE => next_assign_action = Some(1),
455			BinaryCode::CHILD_SET => next_assign_action = Some(2),
456			_ => ()
457		}
458
459
460		// reset flags
461		last_was_property_access = false;
462
463		// set flags
464		if is_new_element {open_element_comma = true} // remember to add comma after element
465
466		// ) ] } end
467		if is_closing {
468			// open_element_comma = false; // no more commas required 
469			last_was_value = false; // no space afterwards
470		} 
471
472		if no_space_around {
473			last_was_value = false // no space afterwards
474		}
475
476		match code {
477			BinaryCode::SET_INTERNAL_VAR => {last_was_value = false}, // no space afterwards
478			BinaryCode::SET_INTERNAL_VAR_REFERENCE => {last_was_value = false}, // no space afterwards
479			BinaryCode::INIT_INTERNAL_VAR => {last_was_value = false}, // no space afterwards
480			BinaryCode::INIT_POINTER => {last_was_value = false}, // no space afterwards
481			BinaryCode::NOT => {last_was_value = false}, // no space afterwards
482			BinaryCode::CHILD_GET => {last_was_property_access = true}, // enable property key formatting for next
483			BinaryCode::CHILD_GET_REF => {last_was_property_access = true}, // enable property key formatting for next
484			BinaryCode::CHILD_ACTION => {last_was_property_access = true}, // enable property key formatting for next
485			BinaryCode::CHILD_SET => {last_was_property_access = true}, // enable property key formatting for next
486			BinaryCode::CHILD_SET_REFERENCE => {last_was_property_access = true}, // enable property key formatting for next
487			
488			BinaryCode::CONJUNCTION => {last_was_value = false}, // no space afterwards
489			BinaryCode::DISJUNCTION => {last_was_value = false}, // no space afterwards
490
491			_ => ()
492		}
493		
494
495
496		// insert label
497		for label in &mut state.labels {
498			// only add if at right index and not yet inserted
499			if *label.0 == state.index.get() && !state.inserted_labels.contains(label.0) {
500				if state.colorized {out += &Color::RESERVED.as_ansi_rgb();}
501				out += "\r\nlbl ";
502				out += &label.1;
503				if state.colorized {out += &Color::DEFAULT.as_ansi_rgb();}
504				out += ";";
505				state.inserted_labels.insert(*label.0);
506			}
507		}
508
509		// TODO: improve this, last_was_value and stack behaviour is not correct all the time.
510		// This tries to reconstruct the runtime behaviour of inserting values to the stack, which fails e.g for function calls and many other usecases that are not
511		// handled in the decompiler - only permanent 100% fix would be to evaluate the conjunction/disjunction in the runtime and stringify the resulting value, but this
512		// is a big overhead for the decompiler and also might create unintended sideffects...
513
514		// update connective_size and add &/| syntax
515		while last_was_value && connective_size_stack.len()!=0 {
516			let len = connective_size_stack.len()-1;
517			connective_size_stack[len] -= 1;
518
519			if state.colorized  {out += &Color::DEFAULT.as_ansi_rgb()};
520
521			// connective_size_stack finished
522			if connective_size_stack[len] == 0 {
523				connective_size_stack.pop();
524				connective_type_stack.pop();
525				out += ")";
526				// possible new loop iteration for next element in stack
527			}
528			// add new connective element
529			else {
530				out += if connective_type_stack[connective_type_stack.len()-1] == BinaryCode::CONJUNCTION {" &"} else {" |"};
531				break; // no further iteration, still in same stack
532			}
533		}
534	
535	
536	}
537
538	if state.colorized {out += AnsiCodes::RESET};
539
540	return out;
541}