Skip to main content

gcrecomp_core/recompiler/
ghidra.rs

1//! ReOxide Integration and Ghidra Headless Analysis
2//!
3//! This module provides integration with Ghidra for reverse engineering analysis.
4//! It supports two backends:
5//! - **ReOxide**: Python-based tool that enhances Ghidra's decompilation capabilities
6//! - **HeadlessCli**: Direct Ghidra headless CLI integration
7//!
8//! # Auto-Installation
9//! The system automatically installs ReOxide via pipx/pip if not present,
10//! ensuring seamless integration without manual setup.
11
12use anyhow::{Context, Result};
13use std::path::{Path, PathBuf};
14use std::process::Command;
15use serde::{Deserialize, Serialize};
16use std::collections::HashMap;
17
18pub struct GhidraAnalysis {
19    pub functions: Vec<FunctionInfo>,
20    pub symbols: Vec<SymbolInfo>,
21    pub decompiled_code: HashMap<u32, DecompiledFunction>,
22    pub instructions: HashMap<u32, Vec<InstructionData>>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct FunctionInfo {
27    pub address: u32,
28    pub name: String,
29    pub size: u32,
30    pub calling_convention: String,
31    pub parameters: Vec<ParameterInfo>,
32    pub return_type: Option<String>,
33    pub local_variables: Vec<LocalVariableInfo>,
34    pub basic_blocks: Vec<BasicBlockInfo>,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct ParameterInfo {
39    pub name: String,
40    #[serde(rename = "type")]
41    pub param_type: String,
42    pub offset: Option<i32>,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct LocalVariableInfo {
47    pub name: String,
48    #[serde(rename = "type")]
49    pub var_type: String,
50    pub offset: i32,
51    pub address: String,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct BasicBlockInfo {
56    pub address: String,
57    pub size: u32,
58    pub instructions: Vec<String>,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct SymbolInfo {
63    pub address: u32,
64    pub name: String,
65    pub symbol_type: SymbolType,
66    pub namespace: Option<String>,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub enum SymbolType {
71    Function,
72    Data,
73    Label,
74    Unknown,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct DecompiledFunction {
79    pub c_code: String,
80    pub high_function: String,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct InstructionData {
85    pub address: u32,
86    pub mnemonic: String,
87    pub operands: Vec<String>,
88    pub raw_bytes: Vec<u8>,
89}
90
91pub enum GhidraBackend {
92    ReOxide,
93    HeadlessCli,
94}
95
96impl GhidraAnalysis {
97    /// Analyze a DOL file using Ghidra.
98    ///
99    /// # Backend Selection
100    /// - **ReOxide**: Automatically installs and uses ReOxide if available
101    /// - **HeadlessCli**: Falls back to direct Ghidra headless CLI
102    ///
103    /// # Arguments
104    /// * `dol_path` - Path to DOL file
105    /// * `backend` - Backend to use (ReOxide will auto-install if needed)
106    ///
107    /// # Returns
108    /// `Result<GhidraAnalysis>` - Analysis results
109    #[inline] // May be called frequently
110    pub fn analyze(dol_path: &str, backend: GhidraBackend) -> Result<Self> {
111        match backend {
112            GhidraBackend::ReOxide => {
113                // Try ReOxide first, fallback to HeadlessCli if it fails
114                Self::analyze_reoxide(dol_path)
115                    .or_else(|e| {
116                        log::warn!("ReOxide analysis failed: {}. Falling back to HeadlessCli.", e);
117                        Self::analyze_headless(dol_path)
118                    })
119            }
120            GhidraBackend::HeadlessCli => Self::analyze_headless(dol_path),
121        }
122    }
123
124    /// Analyze using ReOxide (Python tool for enhanced Ghidra integration).
125    ///
126    /// # Algorithm
127    /// 1. Check if ReOxide is installed, install if missing
128    /// 2. Initialize ReOxide configuration if needed
129    /// 3. Install Ghidra scripts if needed
130    /// 4. Use ReOxide to enhance Ghidra analysis
131    /// 5. Parse enhanced analysis results
132    ///
133    /// # Arguments
134    /// * `dol_path` - Path to DOL file
135    ///
136    /// # Returns
137    /// `Result<GhidraAnalysis>` - Enhanced analysis results
138    #[inline(never)] // Large function - don't inline
139    fn analyze_reoxide(dol_path: &str) -> Result<Self> {
140        log::info!("Using ReOxide backend for enhanced Ghidra analysis...");
141        
142        // Step 1: Ensure ReOxide is installed
143        Self::ensure_reoxide_installed()?;
144        
145        // Step 2: Ensure ReOxide is configured
146        Self::ensure_reoxide_configured()?;
147        
148        // Step 3: Ensure Ghidra scripts are installed
149        Self::ensure_ghidra_scripts_installed()?;
150        
151        // Step 4: Use ReOxide-enhanced Ghidra analysis
152        // ReOxide works with Ghidra, so we still use analyzeHeadless but with ReOxide scripts
153        let dol_path = Path::new(dol_path);
154        let project_name = dol_path
155            .file_stem()
156            .and_then(|s| s.to_str())
157            .context("Invalid DOL path")?;
158
159        // Create a temporary project directory
160        let project_dir = std::env::temp_dir().join(format!("gcrecomp_reoxide_{}", project_name));
161        std::fs::create_dir_all(&project_dir)?;
162
163        // Export directory for Ghidra script output
164        let export_dir = project_dir.join("export");
165        std::fs::create_dir_all(&export_dir)?;
166        std::env::set_var("GHIDRA_EXPORT_DIR", &export_dir);
167
168        // Find Ghidra installation
169        let ghidra_path = find_ghidra()?;
170        let analyze_headless = ghidra_path.join("support").join("analyzeHeadless");
171        
172        // Use ReOxide-enhanced export script
173        let script_path = find_or_create_reoxide_export_script(&ghidra_path)?;
174
175        // Step 1: Import and analyze with ReOxide enhancements
176        log::info!("Importing DOL file into Ghidra with ReOxide...");
177        let import_output = Command::new(&analyze_headless)
178            .arg(&project_dir)
179            .arg(project_name)
180            .arg("-import")
181            .arg(dol_path)
182            .arg("-processor")
183            .arg("PowerPC:BE:32:default")
184            .arg("-analysis")
185            .output()
186            .context("Failed to run Ghidra import with ReOxide")?;
187
188        if !import_output.status.success() {
189            let stderr = String::from_utf8_lossy(&import_output.stderr);
190            log::warn!("Ghidra import warnings: {}", stderr);
191        }
192
193        // Step 2: Run ReOxide-enhanced export script
194        log::info!("Running ReOxide-enhanced export script...");
195        let script_dir = script_path.parent()
196            .context("Script path has no parent directory")?;
197        let script_name = script_path.file_name()
198            .and_then(|n| n.to_str())
199            .context("Invalid script filename")?;
200        
201        let script_output = Command::new(&analyze_headless)
202            .arg(&project_dir)
203            .arg(project_name)
204            .arg("-process")
205            .arg("-scriptPath")
206            .arg(script_dir)
207            .arg("-script")
208            .arg(script_name)
209            .arg("-deleteProject")
210            .output()
211            .context("Failed to run ReOxide export script")?;
212
213        if !script_output.status.success() {
214            let stderr = String::from_utf8_lossy(&script_output.stderr);
215            log::warn!("ReOxide script warnings: {}", stderr);
216        }
217
218        // Step 3: Parse exported data (same as headless)
219        log::info!("Parsing ReOxide-enhanced exported data...");
220        let functions = parse_functions_json(&export_dir)?;
221        let symbols = parse_symbols_json(&export_dir)?;
222        let decompiled_code = parse_decompiled_json(&export_dir)?;
223        let instructions = extract_instructions(&project_dir, project_name)?;
224
225        Ok(Self {
226            functions,
227            symbols,
228            decompiled_code,
229            instructions,
230        })
231    }
232
233    /// Ensure ReOxide is installed, installing it if necessary.
234    ///
235    /// # Algorithm
236    /// 1. Check if `reoxide` command is available
237    /// 2. If not, try to install via pipx (preferred) or pip
238    /// 3. Verify installation succeeded
239    ///
240    /// # Returns
241    /// `Result<()>` - Success if ReOxide is available
242    #[inline] // May be called frequently
243    fn ensure_reoxide_installed() -> Result<()> {
244        // Check if reoxide is already available
245        if Command::new("reoxide")
246            .arg("--version")
247            .output()
248            .is_ok() {
249            log::info!("ReOxide is already installed");
250            return Ok(());
251        }
252
253        log::info!("ReOxide not found. Installing ReOxide...");
254        
255        // Try pipx first (preferred for CLI tools)
256        let install_result = if Command::new("pipx")
257            .arg("--version")
258            .output()
259            .is_ok() {
260            log::info!("Installing ReOxide via pipx...");
261            Command::new("pipx")
262                .arg("install")
263                .arg("reoxide")
264                .output()
265        } else {
266            // Fallback to pip
267            log::info!("Installing ReOxide via pip...");
268            Command::new("pip")
269                .arg("install")
270                .arg("--user")
271                .arg("reoxide")
272                .output()
273        };
274
275        match install_result {
276            Ok(output) if output.status.success() => {
277                log::info!("ReOxide installed successfully");
278                Ok(())
279            }
280            Ok(output) => {
281                let stderr = String::from_utf8_lossy(&output.stderr);
282                anyhow::bail!("Failed to install ReOxide: {}", stderr);
283            }
284            Err(e) => {
285                anyhow::bail!("Failed to run pip/pipx to install ReOxide: {}", e);
286            }
287        }
288    }
289
290    /// Ensure ReOxide is configured with Ghidra.
291    ///
292    /// # Algorithm
293    /// Runs `reoxide init-config` if configuration doesn't exist.
294    ///
295    /// # Returns
296    /// `Result<()>` - Success if ReOxide is configured
297    #[inline] // May be called frequently
298    fn ensure_reoxide_configured() -> Result<()> {
299        // Check if ReOxide config exists (it creates a config file)
300        // For now, we'll just try to run init-config and ignore if it already exists
301        let config_result = Command::new("reoxide")
302            .arg("init-config")
303            .output();
304
305        match config_result {
306            Ok(output) if output.status.success() => {
307                log::info!("ReOxide configuration initialized");
308                Ok(())
309            }
310            Ok(_) => {
311                // Config might already exist, which is fine
312                log::debug!("ReOxide configuration already exists or init skipped");
313                Ok(())
314            }
315            Err(e) => {
316                log::warn!("Could not initialize ReOxide config: {}. Continuing anyway.", e);
317                Ok(()) // Non-fatal, continue
318            }
319        }
320    }
321
322    /// Ensure ReOxide Ghidra scripts are installed.
323    ///
324    /// # Algorithm
325    /// Runs `reoxide install-ghidra-scripts` to install scripts into Ghidra.
326    ///
327    /// # Returns
328    /// `Result<()>` - Success if scripts are installed
329    #[inline] // May be called frequently
330    fn ensure_ghidra_scripts_installed() -> Result<()> {
331        log::info!("Installing ReOxide Ghidra scripts...");
332        
333        let script_result = Command::new("reoxide")
334            .arg("install-ghidra-scripts")
335            .output()
336            .context("Failed to run reoxide install-ghidra-scripts")?;
337
338        if script_result.status.success() {
339            log::info!("ReOxide Ghidra scripts installed successfully");
340            Ok(())
341        } else {
342            let stderr = String::from_utf8_lossy(&script_result.stderr);
343            log::warn!("ReOxide script installation had warnings: {}", stderr);
344            // Non-fatal, continue anyway
345            Ok(())
346        }
347    }
348
349    fn analyze_headless(dol_path: &str) -> Result<Self> {
350        let dol_path = Path::new(dol_path);
351        let project_name = dol_path
352            .file_stem()
353            .and_then(|s| s.to_str())
354            .context("Invalid DOL path")?;
355
356        // Create a temporary project directory
357        let project_dir = std::env::temp_dir().join(format!("gcrecomp_{}", project_name));
358        std::fs::create_dir_all(&project_dir)?;
359
360        // Export directory for Ghidra script output
361        let export_dir = project_dir.join("export");
362        std::fs::create_dir_all(&export_dir)?;
363        std::env::set_var("GHIDRA_EXPORT_DIR", &export_dir);
364
365        // Find Ghidra installation
366        let ghidra_path = find_ghidra()?;
367        let analyze_headless = ghidra_path.join("support").join("analyzeHeadless");
368        let script_path = find_or_create_export_script(&ghidra_path)?;
369
370        // Step 1: Import and analyze
371        log::info!("Importing DOL file into Ghidra...");
372        let import_output = Command::new(&analyze_headless)
373            .arg(&project_dir)
374            .arg(project_name)
375            .arg("-import")
376            .arg(dol_path)
377            .arg("-processor")
378            .arg("PowerPC:BE:32:default")
379            .arg("-analysis")
380            .output()
381            .context("Failed to run Ghidra import")?;
382
383        if !import_output.status.success() {
384            let stderr = String::from_utf8_lossy(&import_output.stderr);
385            log::warn!("Ghidra import warnings: {}", stderr);
386        }
387
388        // Step 2: Run export script
389        log::info!("Running Ghidra export script...");
390        let script_dir = script_path.parent()
391            .context("Script path has no parent directory")?;
392        let script_name = script_path.file_name()
393            .and_then(|n| n.to_str())
394            .context("Invalid script filename")?;
395        
396        let script_output = Command::new(&analyze_headless)
397            .arg(&project_dir)
398            .arg(project_name)
399            .arg("-process")
400            .arg("-scriptPath")
401            .arg(script_dir)
402            .arg("-script")
403            .arg(script_name)
404            .arg("-deleteProject")
405            .output()
406            .context("Failed to run Ghidra export script")?;
407
408        if !script_output.status.success() {
409            let stderr = String::from_utf8_lossy(&script_output.stderr);
410            log::warn!("Ghidra script warnings: {}", stderr);
411        }
412
413        // Step 3: Parse exported data
414        log::info!("Parsing exported data...");
415        let functions = parse_functions_json(&export_dir)?;
416        let symbols = parse_symbols_json(&export_dir)?;
417        let decompiled_code = parse_decompiled_json(&export_dir)?;
418        let instructions = extract_instructions(&project_dir, project_name)?;
419
420        // Note: Cleanup is handled by -deleteProject flag in script execution
421
422        Ok(Self {
423            functions,
424            symbols,
425            decompiled_code,
426            instructions,
427        })
428    }
429
430    pub fn get_function_at_address(&self, address: u32) -> Option<&FunctionInfo> {
431        self.functions
432            .iter()
433            .find(|f| f.address <= address && address < f.address + f.size)
434    }
435}
436
437fn find_ghidra() -> Result<std::path::PathBuf> {
438    // Check common Ghidra installation locations
439    let common_paths: Vec<std::path::PathBuf> = vec![
440        "/usr/local/ghidra".into(),
441        "/opt/ghidra".into(),
442        "/Applications/ghidra".into(),
443    ];
444
445    // Also check environment variable
446    let env_path = std::env::var("GHIDRA_INSTALL_DIR").ok().map(std::path::PathBuf::from);
447
448    let all_paths = common_paths.into_iter().chain(env_path);
449
450    for path in all_paths {
451        let ghidra_path = Path::new(&path);
452        if ghidra_path.join("support").join("analyzeHeadless").exists() {
453            return Ok(ghidra_path.to_path_buf());
454        }
455    }
456
457    anyhow::bail!(
458        "Ghidra not found. Please set GHIDRA_INSTALL_DIR environment variable or install Ghidra in a standard location."
459    );
460}
461
462fn find_or_create_export_script(ghidra_path: &Path) -> Result<PathBuf> {
463    // Check if script exists in scripts directory
464    let script_path = PathBuf::from("scripts/ghidra_export.py");
465    if script_path.exists() {
466        return Ok(script_path);
467    }
468
469    // Try to find it in Ghidra scripts directory
470    let ghidra_scripts = ghidra_path.join("Ghidra").join("Features").join("Python").join("ghidra_scripts");
471    if ghidra_scripts.exists() {
472        let script = ghidra_scripts.join("ghidra_export.py");
473        if script.exists() {
474            return Ok(script);
475        }
476    }
477
478    // Create the script if it doesn't exist
479    let script_content = include_str!("../../scripts/ghidra_export.py");
480    std::fs::write(&script_path, script_content)
481        .context("Failed to create Ghidra export script")?;
482    
483    Ok(script_path)
484}
485
486/// Find or create ReOxide-enhanced export script.
487///
488/// # Algorithm
489/// 1. First, try to find ReOxide scripts in the user's ghidra_scripts directory
490/// 2. Fallback to standard export script if ReOxide scripts not found
491///
492/// # Returns
493/// `Result<PathBuf>` - Path to ReOxide export script or fallback to standard script
494#[inline] // May be called frequently
495fn find_or_create_reoxide_export_script(ghidra_path: &Path) -> Result<PathBuf> {
496    // First, try to find ReOxide scripts in the user's ghidra_scripts directory
497    let home_dir = std::env::var("HOME")
498        .or_else(|_| std::env::var("USERPROFILE"))
499        .ok();
500    
501    if let Some(home) = home_dir {
502        let reoxide_script = PathBuf::from(&home)
503            .join("ghidra_scripts")
504            .join("reoxide_export.py");
505        if reoxide_script.exists() {
506            log::info!("Found ReOxide export script at: {}", reoxide_script.display());
507            return Ok(reoxide_script);
508        }
509    }
510
511    // Fallback to our standard export script
512    log::debug!("ReOxide export script not found, using standard export script");
513    find_or_create_export_script(ghidra_path)
514}
515
516fn parse_functions_json(export_dir: &Path) -> Result<Vec<FunctionInfo>> {
517    let json_path = export_dir.join("functions.json");
518    if !json_path.exists() {
519        log::warn!("functions.json not found, returning empty vector");
520        return Ok(vec![]);
521    }
522
523    let content = std::fs::read_to_string(&json_path)
524        .context("Failed to read functions.json")?;
525    
526    let raw_functions: Vec<serde_json::Value> = serde_json::from_str(&content)
527        .context("Failed to parse functions.json")?;
528
529    let mut functions = Vec::new();
530    for func in raw_functions {
531        let address_str = func["address"].as_str()
532            .context("Missing address in function")?;
533        let address = parse_address(address_str)?;
534
535        let parameters: Vec<ParameterInfo> = func["parameters"]
536            .as_array()
537            .unwrap_or(&vec![])
538            .iter()
539            .map(|p| ParameterInfo {
540                name: p["name"].as_str().unwrap_or("").to_string(),
541                param_type: p["type"].as_str().unwrap_or("u32").to_string(),
542                offset: p["offset"].as_i64().map(|o| o as i32),
543            })
544            .collect();
545
546        let local_vars: Vec<LocalVariableInfo> = func["local_variables"]
547            .as_array()
548            .unwrap_or(&vec![])
549            .iter()
550            .map(|v| LocalVariableInfo {
551                name: v["name"].as_str().unwrap_or("").to_string(),
552                var_type: v["type"].as_str().unwrap_or("u32").to_string(),
553                offset: v["offset"].as_i64().unwrap_or(0) as i32,
554                address: v["address"].as_str().unwrap_or("").to_string(),
555            })
556            .collect();
557
558        let basic_blocks: Vec<BasicBlockInfo> = func["basic_blocks"]
559            .as_array()
560            .unwrap_or(&vec![])
561            .iter()
562            .map(|b| BasicBlockInfo {
563                address: b["address"].as_str().unwrap_or("").to_string(),
564                size: b["size"].as_u64().unwrap_or(0) as u32,
565                instructions: b["instructions"]
566                    .as_array()
567                    .unwrap_or(&vec![])
568                    .iter()
569                    .map(|i| i.as_str().unwrap_or("").to_string())
570                    .collect(),
571            })
572            .collect();
573
574        functions.push(FunctionInfo {
575            address,
576            name: func["name"].as_str().unwrap_or("unknown").to_string(),
577            size: func["size"].as_u64().unwrap_or(0) as u32,
578            calling_convention: func["calling_convention"].as_str().unwrap_or("default").to_string(),
579            parameters,
580            return_type: func["return_type"].as_str().map(|s| s.to_string()),
581            local_variables: local_vars,
582            basic_blocks,
583        });
584    }
585
586    Ok(functions)
587}
588
589fn parse_symbols_json(export_dir: &Path) -> Result<Vec<SymbolInfo>> {
590    let json_path = export_dir.join("symbols.json");
591    if !json_path.exists() {
592        log::warn!("symbols.json not found, returning empty vector");
593        return Ok(vec![]);
594    }
595
596    let content = std::fs::read_to_string(&json_path)
597        .context("Failed to read symbols.json")?;
598    
599    let raw_symbols: Vec<serde_json::Value> = serde_json::from_str(&content)
600        .context("Failed to parse symbols.json")?;
601
602    let mut symbols = Vec::new();
603    for sym in raw_symbols {
604        let address_str = sym["address"].as_str()
605            .context("Missing address in symbol")?;
606        let address = parse_address(address_str)?;
607
608        let symbol_type = match sym["type"].as_str().unwrap_or("Unknown") {
609            "Function" => SymbolType::Function,
610            "Data" => SymbolType::Data,
611            "Label" => SymbolType::Label,
612            _ => SymbolType::Unknown,
613        };
614
615        symbols.push(SymbolInfo {
616            address,
617            name: sym["name"].as_str().unwrap_or("unknown").to_string(),
618            symbol_type,
619            namespace: sym["namespace"].as_str().map(|s| s.to_string()),
620        });
621    }
622
623    Ok(symbols)
624}
625
626fn parse_decompiled_json(export_dir: &Path) -> Result<HashMap<u32, DecompiledFunction>> {
627    let json_path = export_dir.join("decompiled.json");
628    if !json_path.exists() {
629        log::warn!("decompiled.json not found, returning empty map");
630        return Ok(HashMap::new());
631    }
632
633    let content = std::fs::read_to_string(&json_path)
634        .context("Failed to read decompiled.json")?;
635    
636    let raw_decompiled: HashMap<String, serde_json::Value> = serde_json::from_str(&content)
637        .context("Failed to parse decompiled.json")?;
638
639    let mut decompiled = HashMap::new();
640    for (addr_str, func_data) in raw_decompiled {
641        let address = parse_address(&addr_str)?;
642        decompiled.insert(address, DecompiledFunction {
643            c_code: func_data["c_code"].as_str().unwrap_or("").to_string(),
644            high_function: func_data["high_function"].as_str().unwrap_or("").to_string(),
645        });
646    }
647
648    Ok(decompiled)
649}
650
651fn extract_instructions(_project_dir: &Path, _project_name: &str) -> Result<HashMap<u32, Vec<InstructionData>>> {
652    // TODO: Extract instruction-level data from Ghidra
653    // This would require parsing the listing or using a script
654    Ok(HashMap::new())
655}
656
657fn parse_address(addr_str: &str) -> Result<u32> {
658    // Handle formats like "0x80000000" or "80000000"
659    let cleaned = addr_str.trim_start_matches("0x").trim_start_matches("0X");
660    u32::from_str_radix(cleaned, 16)
661        .or_else(|_| cleaned.parse::<u32>())
662        .context(format!("Failed to parse address: {}", addr_str))
663}
664