ghostscope_dwarf/core/
evaluation.rs

1//! DWARF expression evaluation results for LLVM/eBPF code generation
2//!
3//! This module defines the simplified representation of DWARF expressions
4//! that can be directly converted to LLVM IR for eBPF code generation.
5//!
6//! Design principles:
7//! 1. Optimize for eBPF constraints (read registers from pt_regs, read memory via bpf_probe_read_user)
8//! 2. Pre-compute as much as possible at compile time
9//! 3. Clearly separate value semantics from location semantics
10//! 4. Make register dependencies explicit for eBPF verification
11
12use std::fmt;
13
14/// Result of evaluating a DWARF expression for eBPF code generation
15#[derive(Debug, Clone, PartialEq)]
16pub enum EvaluationResult {
17    /// Direct value - expression result is the variable value (no memory read needed)
18    DirectValue(DirectValueResult),
19
20    /// Memory location - expression result is an address that needs to be dereferenced
21    MemoryLocation(LocationResult),
22
23    /// Variable is optimized out (no location/value available)
24    Optimized,
25
26    /// Composite location (multiple pieces) - for split variables
27    Composite(Vec<PieceResult>),
28}
29
30/// Direct value results - expression produces the variable value directly
31#[derive(Debug, Clone, PartialEq)]
32pub enum DirectValueResult {
33    /// Literal constant from DWARF expression (DW_OP_lit*, DW_OP_const*)
34    Constant(i64),
35
36    /// Implicit value embedded in DWARF (DW_OP_implicit_value)
37    ImplicitValue(Vec<u8>),
38
39    /// Register contains the variable value directly (DW_OP_reg*)
40    RegisterValue(u16),
41
42    /// Computed value from expression (DW_OP_stack_value)
43    /// This is a full expression that computes the value
44    ComputedValue {
45        /// Expression steps (stack-based computation)
46        steps: Vec<ComputeStep>,
47        /// Expected result type size
48        result_size: MemoryAccessSize,
49    },
50}
51
52/// Memory location results - expression produces an address to be read via bpf_probe_read_user
53#[derive(Debug, Clone, PartialEq)]
54pub enum LocationResult {
55    /// Absolute memory address (DW_OP_addr)
56    Address(u64),
57
58    /// Register-based address with optional offset (DW_OP_breg*)
59    /// The register value will be read from pt_regs in eBPF
60    RegisterAddress {
61        register: u16, // DWARF register number
62        offset: Option<i64>,
63        size: Option<u64>, // Size hint for memory read
64    },
65
66    /// Complex computed address from multi-step expression
67    /// Will be evaluated step by step in eBPF
68    ComputedLocation {
69        /// Expression that computes the final address
70        steps: Vec<ComputeStep>,
71    },
72}
73
74/// CFA (Canonical Frame Address) computation for stack variables
75#[derive(Debug, Clone, PartialEq)]
76pub enum CfaResult {
77    /// CFA = register + offset (most common case)
78    RegisterPlusOffset {
79        register: u16, // Typically RSP or RBP
80        offset: i64,
81    },
82    /// CFA computed by DWARF expression
83    Expression { steps: Vec<ComputeStep> },
84}
85
86/// Piece of a composite location
87#[derive(Debug, Clone, PartialEq)]
88pub struct PieceResult {
89    /// Location of this piece
90    pub location: EvaluationResult,
91    /// Size in bytes
92    pub size: u64,
93    /// Bit offset within the piece (for bit fields)
94    pub bit_offset: Option<u64>,
95}
96
97/// Computation step for LLVM IR generation
98/// These map directly to LLVM IR operations that can be generated in eBPF
99#[derive(Debug, Clone, PartialEq)]
100pub enum ComputeStep {
101    /// Load register value from pt_regs
102    LoadRegister(u16), // DWARF register number
103
104    /// Push constant
105    PushConstant(i64),
106
107    /// Memory dereference via bpf_probe_read_user
108    Dereference {
109        size: MemoryAccessSize,
110    },
111
112    /// Binary arithmetic operations (pop 2, push 1)
113    Add,
114    Sub,
115    Mul,
116    Div,
117    Mod,
118
119    /// Binary bitwise operations
120    And,
121    Or,
122    Xor,
123    Shl,
124    Shr,
125    Shra, // Arithmetic shift right
126
127    /// Unary operations
128    Not,
129    Neg,
130    Abs,
131
132    /// Stack manipulation
133    Dup,
134    Drop,
135    Swap,
136    Rot,
137    Pick(u8), // Pick nth item from stack
138
139    /// Comparison operations (pop 2, push bool)
140    Eq,
141    Ne,
142    Lt,
143    Le,
144    Gt,
145    Ge,
146
147    /// Control flow (simplified for eBPF)
148    If {
149        then_branch: Vec<ComputeStep>,
150        else_branch: Vec<ComputeStep>,
151    },
152}
153
154/// Memory access size for bpf_probe_read_user
155#[derive(Debug, Clone, Copy, PartialEq)]
156pub enum MemoryAccessSize {
157    U8,  // 1 byte
158    U16, // 2 bytes
159    U32, // 4 bytes
160    U64, // 8 bytes
161}
162
163impl MemoryAccessSize {
164    /// Get size in bytes
165    pub fn bytes(&self) -> usize {
166        match self {
167            MemoryAccessSize::U8 => 1,
168            MemoryAccessSize::U16 => 2,
169            MemoryAccessSize::U32 => 4,
170            MemoryAccessSize::U64 => 8,
171        }
172    }
173
174    /// Create MemoryAccessSize from byte size
175    pub fn from_size(size: u64) -> Self {
176        match size {
177            1 => MemoryAccessSize::U8,
178            2 => MemoryAccessSize::U16,
179            4 => MemoryAccessSize::U32,
180            8 => MemoryAccessSize::U64,
181            _ if size <= 8 => MemoryAccessSize::U64, // Default to U64 for larger sizes
182            _ => MemoryAccessSize::U64,              // Fallback
183        }
184    }
185}
186
187impl EvaluationResult {
188    /// Check if this is a simple constant
189    pub fn as_constant(&self) -> Option<i64> {
190        match self {
191            EvaluationResult::DirectValue(DirectValueResult::Constant(c)) => Some(*c),
192            _ => None,
193        }
194    }
195
196    /// Merge with CFA result for frame-relative addresses (DW_OP_fbreg)
197    /// This is used when a variable location is relative to the frame base
198    pub fn merge_with_cfa(self, cfa: CfaResult, frame_offset: i64) -> Self {
199        match cfa {
200            CfaResult::RegisterPlusOffset { register, offset } => {
201                // CFA gives us the frame base, add the frame_offset to get final location
202                EvaluationResult::MemoryLocation(LocationResult::RegisterAddress {
203                    register,
204                    offset: Some(offset + frame_offset),
205                    size: None,
206                })
207            }
208            CfaResult::Expression { mut steps } => {
209                // Add frame offset to the CFA computation
210                steps.push(ComputeStep::PushConstant(frame_offset));
211                steps.push(ComputeStep::Add);
212                EvaluationResult::MemoryLocation(LocationResult::ComputedLocation { steps })
213            }
214        }
215    }
216}
217
218impl DirectValueResult {
219    /// Check if this is a simple value that can be computed at compile time
220    pub fn is_compile_time_constant(&self) -> bool {
221        matches!(
222            self,
223            DirectValueResult::Constant(_) | DirectValueResult::ImplicitValue(_)
224        )
225    }
226
227    /// Convert compute steps to a human-readable expression
228    fn steps_to_expression(steps: &[ComputeStep]) -> String {
229        use ghostscope_platform::register_mapping::dwarf_reg_to_name;
230
231        // Stack for expression building
232        let mut stack: Vec<String> = Vec::new();
233
234        for step in steps {
235            match step {
236                ComputeStep::LoadRegister(r) => {
237                    let reg_name = dwarf_reg_to_name(*r).unwrap_or("r?").to_string();
238                    stack.push(reg_name);
239                }
240                ComputeStep::PushConstant(v) => {
241                    if *v >= 0 && *v <= 0xFF {
242                        stack.push(format!("{v}"));
243                    } else {
244                        stack.push(format!("0x{v:x}"));
245                    }
246                }
247                ComputeStep::Add => {
248                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
249                        // Special case: register + small offset
250                        if a.chars()
251                            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
252                            && b.parse::<i64>().is_ok()
253                            && b.parse::<i64>().unwrap().abs() < 1000
254                        {
255                            stack.push(format!("{a}+{b}"));
256                        } else {
257                            stack.push(format!("({a}+{b})"));
258                        }
259                    } else {
260                        stack.push("?+?".to_string());
261                    }
262                }
263                ComputeStep::Sub => {
264                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
265                        stack.push(format!("({a}-{b})"));
266                    } else {
267                        stack.push("?-?".to_string());
268                    }
269                }
270                ComputeStep::Mul => {
271                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
272                        stack.push(format!("{a}*{b}"));
273                    } else {
274                        stack.push("?*?".to_string());
275                    }
276                }
277                ComputeStep::Div => {
278                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
279                        stack.push(format!("({a}/{b})"));
280                    } else {
281                        stack.push("?/?".to_string());
282                    }
283                }
284                ComputeStep::Mod => {
285                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
286                        stack.push(format!("({a}%{b})"));
287                    } else {
288                        stack.push("?%?".to_string());
289                    }
290                }
291                ComputeStep::And => {
292                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
293                        stack.push(format!("({a}&{b})"));
294                    } else {
295                        stack.push("?&?".to_string());
296                    }
297                }
298                ComputeStep::Or => {
299                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
300                        stack.push(format!("({a}|{b})"));
301                    } else {
302                        stack.push("?|?".to_string());
303                    }
304                }
305                ComputeStep::Xor => {
306                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
307                        stack.push(format!("({a}^{b})"));
308                    } else {
309                        stack.push("?^?".to_string());
310                    }
311                }
312                ComputeStep::Shl => {
313                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
314                        stack.push(format!("({a}<<{b})"));
315                    } else {
316                        stack.push("?<<?".to_string());
317                    }
318                }
319                ComputeStep::Shr => {
320                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
321                        stack.push(format!("({a}>>{b})"));
322                    } else {
323                        stack.push("?>>?".to_string());
324                    }
325                }
326                ComputeStep::Shra => {
327                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
328                        stack.push(format!("({a}>>>{b})"));
329                    } else {
330                        stack.push("?>>>?".to_string());
331                    }
332                }
333                ComputeStep::Not => {
334                    if let Some(a) = stack.pop() {
335                        stack.push(format!("~{a}"));
336                    } else {
337                        stack.push("~?".to_string());
338                    }
339                }
340                ComputeStep::Neg => {
341                    if let Some(a) = stack.pop() {
342                        stack.push(format!("-{a}"));
343                    } else {
344                        stack.push("-?".to_string());
345                    }
346                }
347                ComputeStep::Abs => {
348                    if let Some(a) = stack.pop() {
349                        stack.push(format!("|{a}|"));
350                    } else {
351                        stack.push("|?|".to_string());
352                    }
353                }
354                ComputeStep::Dereference { size } => {
355                    if let Some(a) = stack.pop() {
356                        stack.push(format!("*({a} as {size})"));
357                    } else {
358                        stack.push(format!("*(? as {size})"));
359                    }
360                }
361                ComputeStep::Dup => {
362                    if let Some(top) = stack.last() {
363                        stack.push(top.clone());
364                    }
365                }
366                ComputeStep::Drop => {
367                    stack.pop();
368                }
369                ComputeStep::Swap => {
370                    if stack.len() >= 2 {
371                        let len = stack.len();
372                        stack.swap(len - 1, len - 2);
373                    }
374                }
375                ComputeStep::Rot => {
376                    if stack.len() >= 3 {
377                        let len = stack.len();
378                        let third = stack.remove(len - 3);
379                        stack.push(third);
380                    }
381                }
382                ComputeStep::Pick(n) => {
383                    if stack.len() > *n as usize {
384                        let idx = stack.len() - 1 - (*n as usize);
385                        let val = stack[idx].clone();
386                        stack.push(val);
387                    } else {
388                        stack.push("?".to_string());
389                    }
390                }
391                ComputeStep::Eq => {
392                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
393                        stack.push(format!("({a}=={b})"));
394                    } else {
395                        stack.push("?==?".to_string());
396                    }
397                }
398                ComputeStep::Ne => {
399                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
400                        stack.push(format!("({a}!={b})"));
401                    } else {
402                        stack.push("?!=?".to_string());
403                    }
404                }
405                ComputeStep::Lt => {
406                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
407                        stack.push(format!("({a}<{b})"));
408                    } else {
409                        stack.push("?<?".to_string());
410                    }
411                }
412                ComputeStep::Le => {
413                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
414                        stack.push(format!("({a}<={b})"));
415                    } else {
416                        stack.push("?<=?".to_string());
417                    }
418                }
419                ComputeStep::Gt => {
420                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
421                        stack.push(format!("({a}>{b})"));
422                    } else {
423                        stack.push("?>?".to_string());
424                    }
425                }
426                ComputeStep::Ge => {
427                    if let (Some(b), Some(a)) = (stack.pop(), stack.pop()) {
428                        stack.push(format!("({a}>={b})"));
429                    } else {
430                        stack.push("?>=?".to_string());
431                    }
432                }
433                ComputeStep::If {
434                    then_branch,
435                    else_branch,
436                } => {
437                    if let Some(cond) = stack.pop() {
438                        stack.push(format!("if {cond} then ... else ..."));
439                    } else {
440                        stack.push("if ? then ... else ...".to_string());
441                    }
442                    // Note: Full if-then-else evaluation would require recursive expression building
443                    _ = then_branch;
444                    _ = else_branch;
445                }
446            }
447        }
448
449        // Return the top of stack or a placeholder
450        stack.pop().unwrap_or_else(|| "?".to_string())
451    }
452}
453
454impl LocationResult {
455    /// Check if this is a simple location (no computation needed)
456    pub fn is_simple(&self) -> bool {
457        matches!(
458            self,
459            LocationResult::Address(_) | LocationResult::RegisterAddress { .. }
460        )
461    }
462
463    /// Convert compute steps to a human-readable expression (reuse from DirectValueResult)
464    fn steps_to_expression(steps: &[ComputeStep]) -> String {
465        DirectValueResult::steps_to_expression(steps)
466    }
467}
468
469impl fmt::Display for EvaluationResult {
470    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
471        match self {
472            EvaluationResult::DirectValue(dv) => write!(f, "[DirectValue] {dv}"),
473            EvaluationResult::MemoryLocation(loc) => write!(f, "[Memory] {loc}"),
474            EvaluationResult::Optimized => write!(f, "<optimized out>"),
475            EvaluationResult::Composite(pieces) => {
476                write!(f, "Composite[{} pieces]", pieces.len())
477            }
478        }
479    }
480}
481
482impl fmt::Display for DirectValueResult {
483    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
484        use ghostscope_platform::register_mapping::dwarf_reg_to_name;
485
486        match self {
487            DirectValueResult::Constant(c) => {
488                if *c >= 0 && *c <= 0xFF {
489                    write!(f, "{c} (0x{c:x})")
490                } else {
491                    write!(f, "0x{c:x}")
492                }
493            }
494            DirectValueResult::RegisterValue(r) => {
495                if let Some(name) = dwarf_reg_to_name(*r) {
496                    write!(f, "{name}")
497                } else {
498                    write!(f, "r{r}")
499                }
500            }
501            DirectValueResult::ImplicitValue(bytes) => {
502                if bytes.len() <= 8 {
503                    write!(f, "implicit[")?;
504                    for (i, b) in bytes.iter().enumerate() {
505                        if i > 0 {
506                            write!(f, " ")?;
507                        }
508                        write!(f, "{b:02x}")?;
509                    }
510                    write!(f, "]")
511                } else {
512                    write!(f, "implicit[{} bytes]", bytes.len())
513                }
514            }
515            DirectValueResult::ComputedValue {
516                steps,
517                result_size: _,
518            } => {
519                // Convert compute steps to a readable expression
520                write!(f, "=")?;
521
522                // Simple expression builder for common patterns
523                let expr = Self::steps_to_expression(steps);
524                write!(f, "{expr}")
525            }
526        }
527    }
528}
529
530impl fmt::Display for LocationResult {
531    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
532        use ghostscope_platform::register_mapping::dwarf_reg_to_name;
533
534        match self {
535            LocationResult::Address(addr) => write!(f, "@0x{addr:x}"),
536            LocationResult::RegisterAddress {
537                register,
538                offset,
539                size,
540            } => {
541                let reg_name = dwarf_reg_to_name(*register).unwrap_or("r?");
542
543                match (offset, size) {
544                    (Some(o), Some(s)) => {
545                        let offset = *o;
546                        if offset >= 0 {
547                            write!(f, "@[{reg_name}+{offset}]:{s}")
548                        } else {
549                            let neg = -offset;
550                            write!(f, "@[{reg_name}-{neg}]:{s}")
551                        }
552                    }
553                    (Some(o), None) => {
554                        let offset = *o;
555                        if offset >= 0 {
556                            write!(f, "@[{reg_name}+{offset}]")
557                        } else {
558                            let neg = -offset;
559                            write!(f, "@[{reg_name}-{neg}]")
560                        }
561                    }
562                    (None, Some(s)) => write!(f, "@[{reg_name}]:{s}"),
563                    (None, None) => write!(f, "@[{reg_name}]"),
564                }
565            }
566            LocationResult::ComputedLocation { steps } => {
567                // Convert compute steps to a readable expression for the address
568                write!(f, "@[")?;
569                let expr = Self::steps_to_expression(steps);
570                write!(f, "{expr}]")
571            }
572        }
573    }
574}
575
576impl fmt::Display for MemoryAccessSize {
577    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
578        match self {
579            MemoryAccessSize::U8 => write!(f, "u8"),
580            MemoryAccessSize::U16 => write!(f, "u16"),
581            MemoryAccessSize::U32 => write!(f, "u32"),
582            MemoryAccessSize::U64 => write!(f, "u64"),
583        }
584    }
585}
586
587impl fmt::Display for ComputeStep {
588    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
589        use ghostscope_platform::register_mapping::dwarf_reg_to_name;
590
591        match self {
592            ComputeStep::LoadRegister(r) => {
593                if let Some(name) = dwarf_reg_to_name(*r) {
594                    write!(f, "load {name}")
595                } else {
596                    write!(f, "load r{r}")
597                }
598            }
599            ComputeStep::PushConstant(v) => write!(f, "push {v}"),
600            ComputeStep::Dereference { size } => write!(f, "deref {size}"),
601            ComputeStep::Add => write!(f, "add"),
602            ComputeStep::Sub => write!(f, "sub"),
603            ComputeStep::Mul => write!(f, "mul"),
604            ComputeStep::Div => write!(f, "div"),
605            ComputeStep::Mod => write!(f, "mod"),
606            ComputeStep::And => write!(f, "and"),
607            ComputeStep::Or => write!(f, "or"),
608            ComputeStep::Xor => write!(f, "xor"),
609            ComputeStep::Shl => write!(f, "shl"),
610            ComputeStep::Shr => write!(f, "shr"),
611            ComputeStep::Shra => write!(f, "shra"),
612            ComputeStep::Not => write!(f, "not"),
613            ComputeStep::Neg => write!(f, "neg"),
614            ComputeStep::Abs => write!(f, "abs"),
615            ComputeStep::Dup => write!(f, "dup"),
616            ComputeStep::Drop => write!(f, "drop"),
617            ComputeStep::Swap => write!(f, "swap"),
618            ComputeStep::Rot => write!(f, "rot"),
619            ComputeStep::Pick(n) => write!(f, "pick {n}"),
620            ComputeStep::Eq => write!(f, "eq"),
621            ComputeStep::Ne => write!(f, "ne"),
622            ComputeStep::Lt => write!(f, "lt"),
623            ComputeStep::Le => write!(f, "le"),
624            ComputeStep::Gt => write!(f, "gt"),
625            ComputeStep::Ge => write!(f, "ge"),
626            ComputeStep::If {
627                then_branch,
628                else_branch,
629            } => {
630                write!(
631                    f,
632                    "if[then:{} else:{}]",
633                    then_branch.len(),
634                    else_branch.len()
635                )
636            }
637        }
638    }
639}