decy_parser/
parser.rs

1//! C parser implementation using clang-sys.
2//!
3//! This module provides the core parsing functionality to convert C source code
4//! into an AST representation using LLVM/Clang bindings.
5
6use anyhow::{Context, Result};
7use clang_sys::*;
8use std::ffi::{CStr, CString};
9use std::path::Path;
10use std::process::Command;
11use std::ptr;
12
13/// Discover system include paths from the clang compiler.
14///
15/// This function runs `clang -E -x c - -v` to extract the system include
16/// search paths, enabling parsing of code that uses standard headers.
17fn discover_system_includes() -> Vec<String> {
18    let mut includes = Vec::new();
19
20    // Try to get include paths from clang
21    let output = Command::new("clang")
22        .args(["-E", "-x", "c", "-", "-v"])
23        .stdin(std::process::Stdio::null())
24        .output();
25
26    if let Ok(output) = output {
27        let stderr = String::from_utf8_lossy(&output.stderr);
28        let mut in_include_section = false;
29
30        for line in stderr.lines() {
31            if line.contains("#include <...> search starts here:") {
32                in_include_section = true;
33                continue;
34            }
35            if line.contains("End of search list.") {
36                break;
37            }
38            if in_include_section {
39                let path = line.trim();
40                // Skip framework directories (macOS-specific)
41                if !path.is_empty() && !path.contains("(framework directory)") {
42                    includes.push(path.to_string());
43                }
44            }
45        }
46    }
47
48    // Fallback paths if clang detection fails
49    if includes.is_empty() {
50        // macOS paths
51        if cfg!(target_os = "macos") {
52            includes.extend([
53                "/usr/local/include".to_string(),
54                "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include".to_string(),
55            ]);
56        }
57        // Linux paths
58        if cfg!(target_os = "linux") {
59            includes.extend([
60                "/usr/include".to_string(),
61                "/usr/local/include".to_string(),
62            ]);
63        }
64    }
65
66    includes
67}
68
69/// C parser using clang-sys.
70///
71/// # Examples
72///
73/// ```no_run
74/// use decy_parser::parser::CParser;
75///
76/// let parser = CParser::new()?;
77/// let ast = parser.parse("int main() { return 0; }")?;
78/// assert_eq!(ast.functions().len(), 1);
79/// # Ok::<(), anyhow::Error>(())
80/// ```
81#[derive(Debug)]
82pub struct CParser {
83    index: CXIndex,
84    /// System include paths discovered from the compiler
85    system_includes: Vec<String>,
86}
87
88impl CParser {
89    /// Create a new C parser.
90    ///
91    /// This automatically discovers system include paths from the clang compiler.
92    ///
93    /// # Examples
94    ///
95    /// ```no_run
96    /// use decy_parser::parser::CParser;
97    ///
98    /// let parser = CParser::new()?;
99    /// # Ok::<(), anyhow::Error>(())
100    /// ```
101    pub fn new() -> Result<Self> {
102        // SAFETY: clang_createIndex is safe to call with these parameters
103        let index = unsafe { clang_createIndex(0, 0) };
104        if index.is_null() {
105            anyhow::bail!("Failed to create clang index");
106        }
107
108        // Discover system include paths for standard header support
109        let system_includes = discover_system_includes();
110
111        Ok(Self {
112            index,
113            system_includes,
114        })
115    }
116
117    /// Parse C source code into an AST.
118    ///
119    /// # Arguments
120    ///
121    /// * `source` - The C source code to parse
122    ///
123    /// # Returns
124    ///
125    /// * `Ok(Ast)` - The parsed AST
126    /// * `Err(anyhow::Error)` - If parsing fails
127    ///
128    /// # Examples
129    ///
130    /// ```no_run
131    /// use decy_parser::parser::CParser;
132    ///
133    /// let parser = CParser::new()?;
134    /// let ast = parser.parse("int add(int a, int b) { return a + b; }")?;
135    /// # Ok::<(), anyhow::Error>(())
136    /// ```
137    pub fn parse(&self, source: &str) -> Result<Ast> {
138        let filename = CString::new("input.c").context("Failed to create filename")?;
139        let source_cstr = CString::new(source).context("Failed to convert source to CString")?;
140
141        let mut ast = Ast::new();
142
143        // Handle empty input
144        if source.trim().is_empty() {
145            return Ok(ast);
146        }
147
148        // SAFETY: Creating unsaved file with valid C strings
149        let unsaved_file = CXUnsavedFile {
150            Filename: filename.as_ptr(),
151            Contents: source_cstr.as_ptr(),
152            Length: source.len() as std::os::raw::c_ulong,
153        };
154
155        // Detect if source contains BARE extern "C" (without #ifdef guards)
156        // If it has #ifdef __cplusplus guards, clang can handle it as C
157        // Only enable C++ mode for bare extern "C" blocks
158        let has_extern_c = source.contains("extern \"C\"");
159        let has_ifdef_guard =
160            source.contains("#ifdef __cplusplus") || source.contains("#if defined(__cplusplus)");
161        let needs_cpp_mode = has_extern_c && !has_ifdef_guard;
162
163        // Build system include path arguments
164        // We need to keep CStrings alive for the duration of parsing
165        let isystem_flag = CString::new("-isystem").unwrap();
166        let include_cstrings: Vec<CString> = self
167            .system_includes
168            .iter()
169            .map(|p| CString::new(p.as_str()).unwrap())
170            .collect();
171
172        // Prepare command line arguments for C++ mode if needed
173        let cpp_flag = CString::new("-x").unwrap();
174        let cpp_lang = CString::new("c++").unwrap();
175
176        // DECY-194: Add standard C macro definitions that might not be available
177        // EOF is defined as -1 in stdio.h, NULL as 0
178        let define_eof = CString::new("-DEOF=-1").unwrap();
179        let define_null = CString::new("-DNULL=0").unwrap();
180        // BUFSIZ from stdio.h (typical value)
181        let define_bufsiz = CString::new("-DBUFSIZ=8192").unwrap();
182
183        // Build the complete args vector
184        let mut args_vec: Vec<*const std::os::raw::c_char> = Vec::new();
185
186        // Add C++ mode flags if needed
187        if needs_cpp_mode {
188            args_vec.push(cpp_flag.as_ptr());
189            args_vec.push(cpp_lang.as_ptr());
190        }
191
192        // Add macro definitions
193        args_vec.push(define_eof.as_ptr());
194        args_vec.push(define_null.as_ptr());
195        args_vec.push(define_bufsiz.as_ptr());
196
197        // Add system include paths
198        for include_path in &include_cstrings {
199            args_vec.push(isystem_flag.as_ptr());
200            args_vec.push(include_path.as_ptr());
201        }
202
203        // SAFETY: Parsing with clang_parseTranslationUnit2
204        // Enable DetailedPreprocessingRecord to capture macro definitions
205        // CXTranslationUnit_DetailedPreprocessingRecord = 1
206        let flags = 1;
207
208        let mut tu = ptr::null_mut();
209        let result = unsafe {
210            clang_parseTranslationUnit2(
211                self.index,
212                filename.as_ptr(),
213                if args_vec.is_empty() {
214                    ptr::null()
215                } else {
216                    args_vec.as_ptr()
217                },
218                args_vec.len() as std::os::raw::c_int,
219                &unsaved_file as *const CXUnsavedFile as *mut CXUnsavedFile,
220                1,
221                flags,
222                &mut tu,
223            )
224        };
225
226        if result != CXError_Success || tu.is_null() {
227            anyhow::bail!("Failed to parse C source");
228        }
229
230        // SAFETY: Check for diagnostics (errors/warnings)
231        let num_diagnostics = unsafe { clang_getNumDiagnostics(tu) };
232        for i in 0..num_diagnostics {
233            let diag = unsafe { clang_getDiagnostic(tu, i) };
234            let severity = unsafe { clang_getDiagnosticSeverity(diag) };
235
236            // If we have errors, fail the parse
237            if severity >= CXDiagnostic_Error {
238                unsafe { clang_disposeDiagnostic(diag) };
239                unsafe { clang_disposeTranslationUnit(tu) };
240                anyhow::bail!("C source has syntax errors");
241            }
242
243            unsafe { clang_disposeDiagnostic(diag) };
244        }
245
246        // SAFETY: Getting cursor from valid translation unit
247        let cursor = unsafe { clang_getTranslationUnitCursor(tu) };
248
249        // Visit children to extract functions
250        let ast_ptr = &mut ast as *mut Ast;
251
252        // SAFETY: Visiting cursor children with callback
253        unsafe {
254            clang_visitChildren(cursor, visit_function, ast_ptr as CXClientData);
255
256            // Clean up
257            clang_disposeTranslationUnit(tu);
258        }
259
260        Ok(ast)
261    }
262
263    /// Parse a C file into an AST.
264    ///
265    /// # Arguments
266    ///
267    /// * `path` - Path to the C file
268    ///
269    /// # Returns
270    ///
271    /// * `Ok(Ast)` - The parsed AST
272    /// * `Err(anyhow::Error)` - If parsing fails
273    pub fn parse_file(&self, _path: &Path) -> Result<Ast> {
274        // RED phase: not yet implemented
275        Err(anyhow::anyhow!("Not implemented yet"))
276    }
277}
278
279impl Drop for CParser {
280    fn drop(&mut self) {
281        // SAFETY: Disposing of valid clang index
282        unsafe {
283            clang_disposeIndex(self.index);
284        }
285    }
286}
287
288/// Visitor callback for clang AST traversal.
289///
290/// # Safety
291///
292/// This function is called by clang_visitChildren and must follow C calling conventions.
293extern "C" fn visit_function(
294    cursor: CXCursor,
295    _parent: CXCursor,
296    client_data: CXClientData,
297) -> CXChildVisitResult {
298    // SAFETY: Converting client data back to AST pointer
299    let ast = unsafe { &mut *(client_data as *mut Ast) };
300
301    // SAFETY: Getting cursor kind
302    let kind = unsafe { clang_getCursorKind(cursor) };
303
304    // Handle extern "C" linkage specifications (DECY-055)
305    // CXCursor_LinkageSpec = 23
306    if kind == 23 {
307        // This is extern "C" { ... } - visit its children
308        // Don't process the linkage spec itself, just recurse into declarations
309        unsafe {
310            clang_visitChildren(cursor, visit_function, client_data);
311        }
312        return CXChildVisit_Continue;
313    }
314
315    if kind == CXCursor_FunctionDecl {
316        // Extract function information
317        if let Some(function) = extract_function(cursor) {
318            ast.add_function(function);
319        }
320    } else if kind == CXCursor_TypedefDecl {
321        // Extract typedef information
322        // DECY-147: Handle anonymous struct typedefs
323        let (typedef_opt, struct_opt) = extract_typedef(cursor);
324        if let Some(typedef) = typedef_opt {
325            ast.add_typedef(typedef);
326        }
327        if let Some(struct_def) = struct_opt {
328            ast.add_struct(struct_def);
329        }
330    } else if kind == CXCursor_StructDecl {
331        // Extract struct information
332        if let Some(struct_def) = extract_struct(cursor) {
333            ast.add_struct(struct_def);
334        }
335    } else if kind == CXCursor_VarDecl {
336        // Extract variable declaration - only add if it's at file scope (global)
337        // Check if parent is translation unit (file scope) vs function scope
338        let semantic_parent = unsafe { clang_getCursorSemanticParent(cursor) };
339        let parent_kind = unsafe { clang_getCursorKind(semantic_parent) };
340
341        // Check if parent is file scope: either TranslationUnit or nullptr
342        // Function declarations have parent kind = CXCursor_FunctionDecl (8)
343        // File-scope variables typically have parent kind = CXCursor_TranslationUnit (300 in clang-sys)
344        let is_file_scope = parent_kind != CXCursor_FunctionDecl;
345
346        if is_file_scope {
347            if let Some(variable) = extract_variable(cursor) {
348                ast.add_variable(variable);
349            }
350        }
351        // Local variables in functions are handled by extract_statement in function body parsing
352    } else if kind == CXCursor_MacroDefinition {
353        // Extract macro definition (only from main file, not includes)
354        let location = unsafe { clang_getCursorLocation(cursor) };
355        let mut file: CXFile = ptr::null_mut();
356        unsafe {
357            clang_getFileLocation(
358                location,
359                &mut file,
360                ptr::null_mut(),
361                ptr::null_mut(),
362                ptr::null_mut(),
363            );
364        }
365
366        // Only process macros from the main file (not system headers)
367        if !file.is_null() {
368            let file_name = unsafe {
369                let name_cxstring = clang_getFileName(file);
370                let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
371                let name = c_str.to_string_lossy().into_owned();
372                clang_disposeString(name_cxstring);
373                name
374            };
375
376            // Only add macros from input.c (our source file)
377            if file_name.ends_with("input.c") {
378                if let Some(macro_def) = extract_macro(cursor) {
379                    ast.add_macro(macro_def);
380                }
381            }
382        }
383    }
384
385    // Return Recurse to ensure we visit children of all nodes
386    // This is needed in C++ mode to reach LinkageSpec and its children
387    CXChildVisit_Recurse
388}
389
390/// Extract function information from a clang cursor.
391fn extract_function(cursor: CXCursor) -> Option<Function> {
392    // SAFETY: Getting cursor spelling (function name)
393    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
394    let name = unsafe {
395        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
396        let name = c_str.to_string_lossy().into_owned();
397        clang_disposeString(name_cxstring);
398        name
399    };
400
401    // SAFETY: Getting return type
402    let cx_type = unsafe { clang_getCursorType(cursor) };
403    let return_cx_type = unsafe { clang_getResultType(cx_type) };
404    let return_type = convert_type(return_cx_type)?;
405
406    // Extract parameters
407    let num_args = unsafe { clang_Cursor_getNumArguments(cursor) };
408    let mut parameters = Vec::new();
409
410    for i in 0..num_args {
411        // SAFETY: Getting argument cursor
412        let arg_cursor = unsafe { clang_Cursor_getArgument(cursor, i as u32) };
413
414        // Get parameter name
415        let param_name_cxstring = unsafe { clang_getCursorSpelling(arg_cursor) };
416        let param_name = unsafe {
417            let c_str = CStr::from_ptr(clang_getCString(param_name_cxstring));
418            let name = c_str.to_string_lossy().into_owned();
419            clang_disposeString(param_name_cxstring);
420            name
421        };
422
423        // Get parameter type
424        let param_cx_type = unsafe { clang_getCursorType(arg_cursor) };
425        if let Some(param_type) = convert_type(param_cx_type) {
426            // DECY-135: Check if this is a pointer with const-qualified pointee
427            let is_pointee_const = unsafe {
428                if param_cx_type.kind == clang_sys::CXType_Pointer {
429                    let pointee = clang_sys::clang_getPointeeType(param_cx_type);
430                    clang_isConstQualifiedType(pointee) != 0
431                } else {
432                    false
433                }
434            };
435            parameters.push(Parameter::new_with_const(
436                param_name,
437                param_type,
438                is_pointee_const,
439            ));
440        }
441    }
442
443    // Extract function body by visiting children
444    let mut body = Vec::new();
445    let body_ptr = &mut body as *mut Vec<Statement>;
446
447    unsafe {
448        clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
449    }
450
451    Some(Function::new_with_body(name, return_type, parameters, body))
452}
453
454/// Extract typedef information from a clang cursor.
455/// Returns (Option<Typedef>, Option<Struct>) - struct is Some when typedef is for anonymous struct.
456fn extract_typedef(cursor: CXCursor) -> (Option<Typedef>, Option<Struct>) {
457    // SAFETY: Getting typedef name
458    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
459    let name = unsafe {
460        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
461        let name = c_str.to_string_lossy().into_owned();
462        clang_disposeString(name_cxstring);
463        name
464    };
465
466    // SAFETY: Getting underlying type of typedef
467    let cx_type = unsafe { clang_getTypedefDeclUnderlyingType(cursor) };
468
469    // DECY-147: Check if underlying type is anonymous struct
470    // Anonymous struct pattern: typedef struct { ... } Name;
471    let canonical = unsafe { clang_getCanonicalType(cx_type) };
472    if canonical.kind == CXType_Record {
473        let decl = unsafe { clang_getTypeDeclaration(canonical) };
474        let struct_name_cxstring = unsafe { clang_getCursorSpelling(decl) };
475        let struct_name = unsafe {
476            let c_str = CStr::from_ptr(clang_getCString(struct_name_cxstring));
477            let sn = c_str.to_string_lossy().into_owned();
478            clang_disposeString(struct_name_cxstring);
479            sn
480        };
481
482        // If struct name is empty, this is an anonymous struct typedef
483        if struct_name.is_empty() {
484            // Extract struct fields from the declaration
485            let mut fields = Vec::new();
486            let fields_ptr = &mut fields as *mut Vec<StructField>;
487
488            unsafe {
489                clang_visitChildren(decl, visit_struct_fields, fields_ptr as CXClientData);
490            }
491
492            // Return struct with typedef name, no typedef needed
493            return (None, Some(Struct::new(name, fields)));
494        }
495    }
496
497    let underlying_type = convert_type(cx_type);
498    match underlying_type {
499        Some(ut) => (Some(Typedef::new(name, ut)), None),
500        None => (None, None),
501    }
502}
503
504/// Extract struct information from a clang cursor.
505fn extract_struct(cursor: CXCursor) -> Option<Struct> {
506    // SAFETY: Getting struct name
507    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
508    let name = unsafe {
509        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
510        let name = c_str.to_string_lossy().into_owned();
511        clang_disposeString(name_cxstring);
512        name
513    };
514
515    // Skip anonymous structs
516    if name.is_empty() {
517        return None;
518    }
519
520    // Extract struct fields by visiting children
521    let mut fields = Vec::new();
522    let fields_ptr = &mut fields as *mut Vec<StructField>;
523
524    unsafe {
525        clang_visitChildren(cursor, visit_struct_fields, fields_ptr as CXClientData);
526    }
527
528    Some(Struct::new(name, fields))
529}
530
531/// Extract macro definition from a clang cursor.
532///
533/// Extract variable declaration information from a clang cursor.
534///
535/// Extracts global and local variable declarations, including function pointers.
536///
537/// # Examples
538///
539/// Simple: `int x;`
540/// Function pointer: `int (*callback)(int);`
541fn extract_variable(cursor: CXCursor) -> Option<Variable> {
542    // SAFETY: Getting cursor spelling (variable name)
543    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
544    let name = unsafe {
545        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
546        let name = c_str.to_string_lossy().into_owned();
547        clang_disposeString(name_cxstring);
548        name
549    };
550
551    // SAFETY: Getting variable type
552    let cx_type = unsafe { clang_getCursorType(cursor) };
553    let var_type = convert_type(cx_type)?;
554
555    // Extract storage class specifiers
556    // CX_StorageClass values (from clang-sys):
557    // CX_SC_Invalid = 0, CX_SC_None = 1, CX_SC_Extern = 2, CX_SC_Static = 3,
558    // CX_SC_PrivateExtern = 4, CX_SC_OpenCLWorkGroupLocal = 5,
559    // CX_SC_Auto = 6, CX_SC_Register = 7
560    let storage_class = unsafe { clang_Cursor_getStorageClass(cursor) };
561    let is_static = storage_class == 3; // CX_SC_Static
562    let is_extern = storage_class == 2; // CX_SC_Extern
563
564    // Check if type is const-qualified
565    let is_const = unsafe { clang_isConstQualifiedType(cx_type) != 0 };
566
567    // Extract initializer by visiting children
568    let mut initializer: Option<Expression> = None;
569    let initializer_ptr = &mut initializer as *mut Option<Expression>;
570
571    unsafe {
572        clang_visitChildren(
573            cursor,
574            visit_variable_initializer,
575            initializer_ptr as CXClientData,
576        );
577    }
578
579    Some(Variable::new_with_storage_class(
580        name,
581        var_type,
582        initializer,
583        is_static,
584        is_extern,
585        is_const,
586    ))
587}
588
589/// Helper function to extract an expression from a cursor.
590/// Dispatches to the appropriate extract function based on cursor kind.
591#[allow(non_upper_case_globals)]
592fn try_extract_expression(cursor: CXCursor) -> Option<Expression> {
593    let kind = unsafe { clang_getCursorKind(cursor) };
594
595    match kind {
596        CXCursor_IntegerLiteral => extract_int_literal(cursor),
597        107 => extract_float_literal(cursor), // CXCursor_FloatingLiteral
598        CXCursor_StringLiteral => extract_string_literal(cursor),
599        110 => extract_char_literal(cursor), // CXCursor_CharacterLiteral
600        CXCursor_DeclRefExpr => extract_variable_ref(cursor),
601        CXCursor_BinaryOperator => extract_binary_op(cursor),
602        CXCursor_CallExpr => extract_function_call(cursor),
603        CXCursor_UnaryOperator => extract_unary_op(cursor),
604        CXCursor_ArraySubscriptExpr => extract_array_index(cursor),
605        CXCursor_MemberRefExpr => extract_field_access(cursor),
606        116 => extract_conditional_op(cursor), // CXCursor_ConditionalOperator (ternary)
607        117 => extract_cast(cursor), // CXCursor_CStyleCastExpr
608        118 => extract_compound_literal(cursor), // CXCursor_CompoundLiteralExpr
609        111 => {
610            // CXCursor_ParenExpr - parenthesized expression like (a > b)
611            // Recurse into children to extract the inner expression
612            let mut result: Option<Expression> = None;
613            let result_ptr = &mut result as *mut Option<Expression>;
614            unsafe {
615                clang_visitChildren(
616                    cursor,
617                    visit_variable_initializer,
618                    result_ptr as CXClientData,
619                );
620            }
621            result
622        }
623        CXCursor_UnexposedExpr => {
624            // UnexposedExpr is a wrapper - recurse into children
625            let mut result: Option<Expression> = None;
626            let result_ptr = &mut result as *mut Option<Expression>;
627            unsafe {
628                clang_visitChildren(
629                    cursor,
630                    visit_variable_initializer,
631                    result_ptr as CXClientData,
632                );
633            }
634            result
635        }
636        _ => None,
637    }
638}
639
640/// Visitor callback for variable initializer expressions.
641#[allow(non_upper_case_globals)]
642extern "C" fn visit_variable_initializer(
643    cursor: CXCursor,
644    _parent: CXCursor,
645    client_data: CXClientData,
646) -> CXChildVisitResult {
647    let initializer = unsafe { &mut *(client_data as *mut Option<Expression>) };
648
649    // Extract the first expression found (the initializer)
650    if let Some(expr) = try_extract_expression(cursor) {
651        *initializer = Some(expr);
652        return CXChildVisit_Break;
653    }
654
655    CXChildVisit_Continue
656}
657
658/// This function extracts #define directives, supporting both object-like and function-like macros.
659///
660/// # Examples
661///
662/// Object-like: `#define MAX 100`
663/// Function-like: `#define SQR(x) ((x) * (x))`
664fn extract_macro(cursor: CXCursor) -> Option<MacroDefinition> {
665    // SAFETY: Getting macro name
666    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
667    let name = unsafe {
668        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
669        let name = c_str.to_string_lossy().into_owned();
670        clang_disposeString(name_cxstring);
671        name
672    };
673
674    // Skip empty macro names
675    if name.is_empty() {
676        return None;
677    }
678
679    // Get macro body using clang_Cursor_isMacroFunctionLike and clang token APIs
680    // For now, we'll check if it's function-like and extract tokens
681    let is_function_like = unsafe { clang_sys::clang_Cursor_isMacroFunctionLike(cursor) } != 0;
682
683    // Get the source range and tokens for the macro
684    let range = unsafe { clang_getCursorExtent(cursor) };
685    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
686
687    let mut tokens: *mut CXToken = ptr::null_mut();
688    let mut num_tokens: u32 = 0;
689
690    unsafe {
691        clang_tokenize(tu, range, &mut tokens, &mut num_tokens);
692    }
693
694    // Extract macro body from tokens
695    // Skip the first token (macro name) and extract the rest
696    let mut parameters = Vec::new();
697    let mut body_tokens = Vec::new();
698    let mut in_params = false;
699
700    for i in 0..num_tokens {
701        let token = unsafe { *tokens.offset(i as isize) };
702        let token_kind = unsafe { clang_getTokenKind(token) };
703        let token_spelling = unsafe { clang_getTokenSpelling(tu, token) };
704        let token_str = unsafe {
705            let c_str = CStr::from_ptr(clang_getCString(token_spelling));
706            let s = c_str.to_string_lossy().into_owned();
707            clang_disposeString(token_spelling);
708            s
709        };
710
711        // Skip the macro name (first token)
712        if i == 0 {
713            continue;
714        }
715
716        // Check for parameter list (function-like macros)
717        if is_function_like && i == 1 && token_str == "(" {
718            in_params = true;
719            continue;
720        }
721
722        if in_params {
723            if token_str == ")" {
724                in_params = false;
725                continue;
726            } else if token_str != ","
727                && (token_kind == CXToken_Identifier || token_kind == CXToken_Keyword)
728            {
729                // Accept both identifiers and keywords as parameter names
730                // C allows keywords in macro parameter names since they're in macro scope
731                parameters.push(token_str);
732            }
733        } else {
734            body_tokens.push(token_str);
735        }
736    }
737
738    // Clean up tokens
739    unsafe {
740        clang_disposeTokens(tu, tokens, num_tokens);
741    }
742
743    // Join body tokens without spaces (preserving original formatting)
744    let body = body_tokens.join("");
745
746    if is_function_like {
747        Some(MacroDefinition::new_function_like(name, parameters, body))
748    } else {
749        Some(MacroDefinition::new_object_like(name, body))
750    }
751}
752
753/// Visitor callback for struct fields.
754///
755/// # Safety
756///
757/// This function is called by clang_visitChildren and must follow C calling conventions.
758#[allow(non_upper_case_globals)]
759extern "C" fn visit_struct_fields(
760    cursor: CXCursor,
761    _parent: CXCursor,
762    client_data: CXClientData,
763) -> CXChildVisitResult {
764    // SAFETY: Converting client data back to fields vector pointer
765    let fields = unsafe { &mut *(client_data as *mut Vec<StructField>) };
766
767    // SAFETY: Getting cursor kind
768    let kind = unsafe { clang_getCursorKind(cursor) };
769
770    if kind == CXCursor_FieldDecl {
771        // Get field name
772        let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
773        let name = unsafe {
774            let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
775            let name = c_str.to_string_lossy().into_owned();
776            clang_disposeString(name_cxstring);
777            name
778        };
779
780        // Get field type
781        let cx_type = unsafe { clang_getCursorType(cursor) };
782        if let Some(field_type) = convert_type(cx_type) {
783            fields.push(StructField::new(name, field_type));
784        }
785    }
786
787    CXChildVisit_Continue
788}
789
790/// Visitor callback for extracting statements from function body.
791///
792/// # Safety
793///
794/// This function is called by clang_visitChildren and must follow C calling conventions.
795#[allow(non_upper_case_globals)]
796extern "C" fn visit_statement(
797    cursor: CXCursor,
798    _parent: CXCursor,
799    client_data: CXClientData,
800) -> CXChildVisitResult {
801    // SAFETY: Converting client data back to statement vector pointer
802    let statements = unsafe { &mut *(client_data as *mut Vec<Statement>) };
803
804    // SAFETY: Getting cursor kind
805    let kind = unsafe { clang_getCursorKind(cursor) };
806
807    match kind {
808        CXCursor_CompoundStmt => {
809            // Compound statement (function body) - recurse into it
810            CXChildVisit_Recurse
811        }
812        CXCursor_DeclStmt => {
813            // Declaration statement - visit its children to get the actual declaration
814            CXChildVisit_Recurse
815        }
816        CXCursor_VarDecl => {
817            // Variable declaration
818            if let Some(stmt) = extract_var_decl(cursor) {
819                statements.push(stmt);
820            }
821            CXChildVisit_Continue
822        }
823        CXCursor_ReturnStmt => {
824            // Return statement
825            if let Some(stmt) = extract_return_stmt(cursor) {
826                statements.push(stmt);
827            }
828            CXChildVisit_Continue
829        }
830        CXCursor_BinaryOperator => {
831            // Could be an assignment statement (x = 42)
832            if let Some(stmt) = extract_assignment_stmt(cursor) {
833                statements.push(stmt);
834            }
835            CXChildVisit_Continue
836        }
837        CXCursor_IfStmt => {
838            // If statement
839            if let Some(stmt) = extract_if_stmt(cursor) {
840                statements.push(stmt);
841            }
842            CXChildVisit_Continue
843        }
844        CXCursor_ForStmt => {
845            // For loop
846            if let Some(stmt) = extract_for_stmt(cursor) {
847                statements.push(stmt);
848            }
849            CXChildVisit_Continue
850        }
851        CXCursor_WhileStmt => {
852            // While loop
853            if let Some(stmt) = extract_while_stmt(cursor) {
854                statements.push(stmt);
855            }
856            CXChildVisit_Continue
857        }
858        CXCursor_SwitchStmt => {
859            // Switch statement
860            if let Some(stmt) = extract_switch_stmt(cursor) {
861                statements.push(stmt);
862            }
863            CXChildVisit_Continue
864        }
865        CXCursor_BreakStmt => {
866            // Break statement
867            statements.push(Statement::Break);
868            CXChildVisit_Continue
869        }
870        CXCursor_ContinueStmt => {
871            // Continue statement
872            statements.push(Statement::Continue);
873            CXChildVisit_Continue
874        }
875        CXCursor_UnaryOperator => {
876            // Could be ++/-- statement (ptr++, ++ptr, ptr--, --ptr)
877            if let Some(stmt) = extract_inc_dec_stmt(cursor) {
878                statements.push(stmt);
879            }
880            CXChildVisit_Continue
881        }
882        CXCursor_CompoundAssignOperator => {
883            // Compound assignment (+=, -=, *=, /=, %=)
884            if let Some(stmt) = extract_compound_assignment_stmt(cursor) {
885                statements.push(stmt);
886            }
887            CXChildVisit_Continue
888        }
889        CXCursor_CallExpr => {
890            // Function call as statement (DECY-066)
891            // e.g., printf("Hello"); or free(ptr);
892            if let Some(stmt) = extract_statement(cursor) {
893                statements.push(stmt);
894            }
895            CXChildVisit_Continue
896        }
897        _ => CXChildVisit_Recurse, // Recurse into unknown nodes to find statements
898    }
899}
900
901/// Extract a variable declaration statement.
902fn extract_var_decl(cursor: CXCursor) -> Option<Statement> {
903    // Get variable name
904    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
905    let name = unsafe {
906        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
907        let name = c_str.to_string_lossy().into_owned();
908        clang_disposeString(name_cxstring);
909        name
910    };
911
912    // Get variable type
913    let cx_type = unsafe { clang_getCursorType(cursor) };
914    let var_type = convert_type(cx_type)?;
915
916    // Extract initializer by visiting children
917    let mut initializer: Option<Expression> = None;
918    let init_ptr = &mut initializer as *mut Option<Expression>;
919
920    unsafe {
921        clang_visitChildren(cursor, visit_expression, init_ptr as CXClientData);
922    }
923
924    // DECY-112 FIX: For array types, the visit_expression callback may incorrectly
925    // capture the array size as an initializer. For example, `int nums[5];` has
926    // the integer literal 5 as a child node (the array size), which gets captured.
927    //
928    // Fix: If the variable is an array type and the initializer is an integer literal
929    // that matches the array size, clear the initializer (it's the size, not an init).
930    let initializer = match (&var_type, &initializer) {
931        (
932            Type::Array {
933                size: Some(array_size),
934                ..
935            },
936            Some(Expression::IntLiteral(init_val)),
937        ) if i64::from(*init_val) == *array_size => {
938            // The "initializer" is actually the array size expression, not a real initializer
939            None
940        }
941        _ => initializer,
942    };
943
944    Some(Statement::VariableDeclaration {
945        name,
946        var_type,
947        initializer,
948    })
949}
950
951/// Extract a return statement.
952fn extract_return_stmt(cursor: CXCursor) -> Option<Statement> {
953    // Extract return expression by visiting children
954    let mut return_expr: Option<Expression> = None;
955    let expr_ptr = &mut return_expr as *mut Option<Expression>;
956
957    unsafe {
958        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
959    }
960
961    Some(Statement::Return(return_expr))
962}
963
964/// Extract an assignment statement.
965fn extract_assignment_stmt(cursor: CXCursor) -> Option<Statement> {
966    // Check if this binary operator is an assignment '=' (not '==', '!=', etc.)
967    // Get the translation unit
968    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
969    if tu.is_null() {
970        return None;
971    }
972
973    // Get the extent (source range) of the cursor
974    let extent = unsafe { clang_getCursorExtent(cursor) };
975
976    // Tokenize to find the operator
977    let mut tokens = ptr::null_mut();
978    let mut num_tokens = 0;
979
980    unsafe {
981        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
982    }
983
984    let mut is_assignment = false;
985
986    // Look through tokens to find '=' (and make sure it's not '==', '!=', etc.)
987    for i in 0..num_tokens {
988        unsafe {
989            let token = *tokens.add(i as usize);
990            let token_kind = clang_getTokenKind(token);
991
992            if token_kind == CXToken_Punctuation {
993                let token_cxstring = clang_getTokenSpelling(tu, token);
994                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
995                if let Ok(token_str) = c_str.to_str() {
996                    // Only accept single '=' for assignment
997                    if token_str == "=" {
998                        is_assignment = true;
999                        clang_disposeString(token_cxstring);
1000                        break;
1001                    } else if token_str == "=="
1002                        || token_str == "!="
1003                        || token_str == "<="
1004                        || token_str == ">="
1005                    {
1006                        // This is a comparison operator, not assignment
1007                        clang_disposeString(token_cxstring);
1008                        break;
1009                    }
1010                }
1011                clang_disposeString(token_cxstring);
1012            }
1013        }
1014    }
1015
1016    unsafe {
1017        clang_disposeTokens(tu, tokens, num_tokens);
1018    }
1019
1020    if !is_assignment {
1021        return None;
1022    }
1023
1024    // Extract left side (target) and right side (value)
1025    let mut operands: Vec<Expression> = Vec::new();
1026    let operands_ptr = &mut operands as *mut Vec<Expression>;
1027
1028    unsafe {
1029        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
1030    }
1031
1032    // Assignment should have exactly 2 operands
1033    if operands.len() != 2 {
1034        return None;
1035    }
1036
1037    // Check if left side is a dereference (e.g., *dst = x)
1038    if let Expression::Dereference(inner) = &operands[0] {
1039        return Some(Statement::DerefAssignment {
1040            target: (**inner).clone(), // Extract the inner expression being dereferenced
1041            value: operands[1].clone(),
1042        });
1043    }
1044
1045    // Check if left side is an array index (e.g., arr[i] = value)
1046    if let Expression::ArrayIndex { array, index } = &operands[0] {
1047        return Some(Statement::ArrayIndexAssignment {
1048            array: array.clone(),
1049            index: index.clone(),
1050            value: operands[1].clone(),
1051        });
1052    }
1053
1054    // Check if left side is a field access (e.g., ptr->field = value or obj.field = value)
1055    if matches!(
1056        &operands[0],
1057        Expression::PointerFieldAccess { .. } | Expression::FieldAccess { .. }
1058    ) {
1059        // Extract field name from the expression
1060        let field = match &operands[0] {
1061            Expression::PointerFieldAccess { field, .. } => field.clone(),
1062            Expression::FieldAccess { field, .. } => field.clone(),
1063            _ => unreachable!(),
1064        };
1065
1066        // Extract object from the expression
1067        let object = match &operands[0] {
1068            Expression::PointerFieldAccess { pointer, .. } => (**pointer).clone(),
1069            Expression::FieldAccess { object, .. } => (**object).clone(),
1070            _ => unreachable!(),
1071        };
1072
1073        return Some(Statement::FieldAssignment {
1074            object,
1075            field,
1076            value: operands[1].clone(),
1077        });
1078    }
1079
1080    // Left side must be a variable reference for regular assignment
1081    let target = match &operands[0] {
1082        Expression::Variable(name) => name.clone(),
1083        _ => return None, // Can't assign to non-variables (yet)
1084    };
1085
1086    Some(Statement::Assignment {
1087        target,
1088        value: operands[1].clone(),
1089    })
1090}
1091
1092/// Extract an increment/decrement statement (++, --).
1093fn extract_inc_dec_stmt(cursor: CXCursor) -> Option<Statement> {
1094    // Get the translation unit
1095    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
1096    if tu.is_null() {
1097        return None;
1098    }
1099
1100    // Get the extent (source range) of the cursor
1101    let extent = unsafe { clang_getCursorExtent(cursor) };
1102
1103    // Tokenize to find the operator
1104    let mut tokens = ptr::null_mut();
1105    let mut num_tokens = 0;
1106
1107    unsafe {
1108        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
1109    }
1110
1111    let mut operator: Option<String> = None;
1112    let mut operator_position = 0;
1113
1114    // Look through tokens to find ++ or --
1115    for i in 0..num_tokens {
1116        unsafe {
1117            let token = *tokens.add(i as usize);
1118            let token_kind = clang_getTokenKind(token);
1119
1120            if token_kind == CXToken_Punctuation {
1121                let token_cxstring = clang_getTokenSpelling(tu, token);
1122                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
1123                if let Ok(token_str) = c_str.to_str() {
1124                    if token_str == "++" || token_str == "--" {
1125                        operator = Some(token_str.to_string()); // Clone the string before disposing
1126                        operator_position = i;
1127                        clang_disposeString(token_cxstring);
1128                        break;
1129                    }
1130                }
1131                clang_disposeString(token_cxstring);
1132            }
1133        }
1134    }
1135
1136    // Determine if this is pre or post increment/decrement
1137    // If operator comes before identifier, it's pre (++ptr)
1138    // If operator comes after identifier, it's post (ptr++)
1139    let is_pre = operator_position == 0;
1140
1141    unsafe {
1142        clang_disposeTokens(tu, tokens, num_tokens);
1143    }
1144
1145    // DECY-166: First check if this is a member expression increment (e.g., sb->length++)
1146    // We need to detect this case and create a FieldAssignment instead of PostIncrement
1147    let mut member_expr: Option<Expression> = None;
1148    let mut simple_var: Option<String> = None;
1149
1150    // Visit children to find either a MemberRefExpr or DeclRefExpr
1151    extern "C" fn visit_for_inc_target(
1152        cursor: CXCursor,
1153        _parent: CXCursor,
1154        client_data: CXClientData,
1155    ) -> CXChildVisitResult {
1156        let data = unsafe { &mut *(client_data as *mut (Option<Expression>, Option<String>)) };
1157        let kind = unsafe { clang_getCursorKind(cursor) };
1158
1159        // Check for member expression first (sb->length, obj.field)
1160        if kind == CXCursor_MemberRefExpr {
1161            if let Some(expr) = extract_field_access(cursor) {
1162                data.0 = Some(expr);
1163                return CXChildVisit_Break;
1164            }
1165        }
1166
1167        // Fall back to simple variable reference
1168        if kind == CXCursor_DeclRefExpr {
1169            let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
1170            let name = unsafe {
1171                let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
1172                let var_name = c_str.to_string_lossy().into_owned();
1173                clang_disposeString(name_cxstring);
1174                var_name
1175            };
1176            data.1 = Some(name);
1177            CXChildVisit_Break
1178        } else {
1179            CXChildVisit_Recurse
1180        }
1181    }
1182
1183    let mut target_data = (member_expr, simple_var);
1184    let target_ptr = &mut target_data as *mut (Option<Expression>, Option<String>);
1185    unsafe {
1186        clang_visitChildren(cursor, visit_for_inc_target, target_ptr as CXClientData);
1187    }
1188    member_expr = target_data.0;
1189    simple_var = target_data.1;
1190
1191    let operator = operator?;
1192    let op_str = operator.as_str();
1193
1194    // DECY-166: If we found a member expression, create a FieldAssignment
1195    // sb->length++ becomes FieldAssignment { object: sb, field: "length", value: sb->length + 1 }
1196    if let Some(expr) = member_expr {
1197        // Determine the delta (+1 or -1) based on operator
1198        let delta = match op_str {
1199            "++" => 1,
1200            "--" => -1,
1201            _ => return None,
1202        };
1203
1204        // Extract object and field from the expression
1205        match expr {
1206            Expression::PointerFieldAccess { pointer, field } => {
1207                // Create the increment/decrement value expression
1208                let value = if delta > 0 {
1209                    Expression::BinaryOp {
1210                        left: Box::new(Expression::PointerFieldAccess {
1211                            pointer: pointer.clone(),
1212                            field: field.clone(),
1213                        }),
1214                        op: BinaryOperator::Add,
1215                        right: Box::new(Expression::IntLiteral(1)),
1216                    }
1217                } else {
1218                    Expression::BinaryOp {
1219                        left: Box::new(Expression::PointerFieldAccess {
1220                            pointer: pointer.clone(),
1221                            field: field.clone(),
1222                        }),
1223                        op: BinaryOperator::Subtract,
1224                        right: Box::new(Expression::IntLiteral(1)),
1225                    }
1226                };
1227
1228                return Some(Statement::FieldAssignment {
1229                    object: *pointer,
1230                    field,
1231                    value,
1232                });
1233            }
1234            Expression::FieldAccess { object, field } => {
1235                // Create the increment/decrement value expression
1236                let value = if delta > 0 {
1237                    Expression::BinaryOp {
1238                        left: Box::new(Expression::FieldAccess {
1239                            object: object.clone(),
1240                            field: field.clone(),
1241                        }),
1242                        op: BinaryOperator::Add,
1243                        right: Box::new(Expression::IntLiteral(1)),
1244                    }
1245                } else {
1246                    Expression::BinaryOp {
1247                        left: Box::new(Expression::FieldAccess {
1248                            object: object.clone(),
1249                            field: field.clone(),
1250                        }),
1251                        op: BinaryOperator::Subtract,
1252                        right: Box::new(Expression::IntLiteral(1)),
1253                    }
1254                };
1255
1256                return Some(Statement::FieldAssignment {
1257                    object: *object,
1258                    field,
1259                    value,
1260                });
1261            }
1262            _ => {} // Fall through to simple variable handling
1263        }
1264    }
1265
1266    // Simple variable increment/decrement
1267    let target = simple_var?;
1268
1269    match op_str {
1270        "++" => {
1271            if is_pre {
1272                Some(Statement::PreIncrement { target })
1273            } else {
1274                Some(Statement::PostIncrement { target })
1275            }
1276        }
1277        "--" => {
1278            if is_pre {
1279                Some(Statement::PreDecrement { target })
1280            } else {
1281                Some(Statement::PostDecrement { target })
1282            }
1283        }
1284        _ => None,
1285    }
1286}
1287
1288/// Extract a compound assignment statement (+=, -=, *=, /=, %=).
1289fn extract_compound_assignment_stmt(cursor: CXCursor) -> Option<Statement> {
1290    // Get the translation unit
1291    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
1292    if tu.is_null() {
1293        return None;
1294    }
1295
1296    // Get the extent (source range) of the cursor
1297    let extent = unsafe { clang_getCursorExtent(cursor) };
1298
1299    // Tokenize to find the operator
1300    let mut tokens = ptr::null_mut();
1301    let mut num_tokens = 0;
1302
1303    unsafe {
1304        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
1305    }
1306
1307    let mut operator: Option<BinaryOperator> = None;
1308
1309    // Look through tokens to find compound assignment operator
1310    for i in 0..num_tokens {
1311        unsafe {
1312            let token = *tokens.add(i as usize);
1313            let token_kind = clang_getTokenKind(token);
1314
1315            if token_kind == CXToken_Punctuation {
1316                let token_cxstring = clang_getTokenSpelling(tu, token);
1317                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
1318                if let Ok(token_str) = c_str.to_str() {
1319                    operator = match token_str {
1320                        "+=" => Some(BinaryOperator::Add),
1321                        "-=" => Some(BinaryOperator::Subtract),
1322                        "*=" => Some(BinaryOperator::Multiply),
1323                        "/=" => Some(BinaryOperator::Divide),
1324                        "%=" => Some(BinaryOperator::Modulo),
1325                        _ => None,
1326                    };
1327                    if operator.is_some() {
1328                        clang_disposeString(token_cxstring);
1329                        break;
1330                    }
1331                }
1332                clang_disposeString(token_cxstring);
1333            }
1334        }
1335    }
1336
1337    unsafe {
1338        clang_disposeTokens(tu, tokens, num_tokens);
1339    }
1340
1341    let op = operator?;
1342
1343    // Extract left side (target) and right side (value)
1344    let mut operands: Vec<Expression> = Vec::new();
1345    let operands_ptr = &mut operands as *mut Vec<Expression>;
1346
1347    unsafe {
1348        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
1349    }
1350
1351    // Compound assignment should have exactly 2 operands
1352    if operands.len() != 2 {
1353        return None;
1354    }
1355
1356    // DECY-185: Check for complex targets first (Dereference, PointerFieldAccess, FieldAccess)
1357    // These need DerefCompoundAssignment since target is an Expression, not just a String
1358
1359    // Check if left side is a dereference (e.g., *ptr *= 2)
1360    if let Expression::Dereference(inner) = &operands[0] {
1361        return Some(Statement::DerefCompoundAssignment {
1362            target: (**inner).clone(), // The thing being dereferenced (e.g., 'ptr')
1363            op,
1364            value: operands[1].clone(),
1365        });
1366    }
1367
1368    // Check if left side is a pointer field access (e.g., sb->capacity *= 2)
1369    if let Expression::PointerFieldAccess { .. } = &operands[0] {
1370        return Some(Statement::DerefCompoundAssignment {
1371            target: operands[0].clone(), // The full PointerFieldAccess expression
1372            op,
1373            value: operands[1].clone(),
1374        });
1375    }
1376
1377    // Check if left side is a struct field access (e.g., obj.field *= 2)
1378    if let Expression::FieldAccess { .. } = &operands[0] {
1379        return Some(Statement::DerefCompoundAssignment {
1380            target: operands[0].clone(), // The full FieldAccess expression
1381            op,
1382            value: operands[1].clone(),
1383        });
1384    }
1385
1386    // Check if left side is an array index (e.g., arr[i] *= 2)
1387    if let Expression::ArrayIndex { .. } = &operands[0] {
1388        return Some(Statement::DerefCompoundAssignment {
1389            target: operands[0].clone(), // The full ArrayIndex expression
1390            op,
1391            value: operands[1].clone(),
1392        });
1393    }
1394
1395    // Simple variable target (existing behavior)
1396    let target = match &operands[0] {
1397        Expression::Variable(name) => name.clone(),
1398        _ => return None, // Unknown target type
1399    };
1400
1401    Some(Statement::CompoundAssignment {
1402        target,
1403        op,
1404        value: operands[1].clone(),
1405    })
1406}
1407
1408/// Extract an if statement.
1409fn extract_if_stmt(cursor: CXCursor) -> Option<Statement> {
1410    // An if statement has 2 or 3 children:
1411    // 1. Condition expression
1412    // 2. Then block (compound statement)
1413    // 3. Else block (optional compound statement)
1414
1415    #[repr(C)]
1416    struct IfData {
1417        condition: Option<Expression>,
1418        then_block: Vec<Statement>,
1419        else_block: Option<Vec<Statement>>,
1420        child_index: u32,
1421    }
1422
1423    let mut if_data = IfData {
1424        condition: None,
1425        then_block: Vec::new(),
1426        else_block: None,
1427        child_index: 0,
1428    };
1429
1430    let data_ptr = &mut if_data as *mut IfData;
1431
1432    unsafe {
1433        clang_visitChildren(cursor, visit_if_children, data_ptr as CXClientData);
1434    }
1435
1436    Some(Statement::If {
1437        condition: if_data.condition?,
1438        then_block: if_data.then_block,
1439        else_block: if_data.else_block,
1440    })
1441}
1442
1443/// Visitor for if statement children.
1444#[allow(non_upper_case_globals)]
1445extern "C" fn visit_if_children(
1446    cursor: CXCursor,
1447    _parent: CXCursor,
1448    client_data: CXClientData,
1449) -> CXChildVisitResult {
1450    #[repr(C)]
1451    struct IfData {
1452        condition: Option<Expression>,
1453        then_block: Vec<Statement>,
1454        else_block: Option<Vec<Statement>>,
1455        child_index: u32,
1456    }
1457
1458    let if_data = unsafe { &mut *(client_data as *mut IfData) };
1459    let kind = unsafe { clang_getCursorKind(cursor) };
1460
1461    match if_data.child_index {
1462        0 => {
1463            // First child: condition expression
1464            // The cursor itself IS the condition, extract it directly
1465            if_data.condition = match kind {
1466                CXCursor_BinaryOperator => extract_binary_op(cursor),
1467                CXCursor_IntegerLiteral => extract_int_literal(cursor),
1468                107 => extract_float_literal(cursor), // CXCursor_FloatingLiteral
1469                110 => extract_char_literal(cursor), // CXCursor_CharacterLiteral
1470                CXCursor_DeclRefExpr => extract_variable_ref(cursor),
1471                CXCursor_CallExpr => extract_function_call(cursor),
1472                CXCursor_UnaryOperator => extract_unary_op(cursor),
1473                _ => {
1474                    // For other expression types, try visiting children
1475                    let mut cond_expr: Option<Expression> = None;
1476                    let expr_ptr = &mut cond_expr as *mut Option<Expression>;
1477                    unsafe {
1478                        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
1479                    }
1480                    cond_expr
1481                }
1482            };
1483            if_data.child_index += 1;
1484            CXChildVisit_Continue
1485        }
1486        1 => {
1487            // Second child: then block
1488            // DECY-216: Handle both compound statements (with braces) and single statements
1489            if kind == CXCursor_CompoundStmt {
1490                let body_ptr = &mut if_data.then_block as *mut Vec<Statement>;
1491                unsafe {
1492                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1493                }
1494            } else {
1495                // Single statement without braces: if (cond) return 1;
1496                if let Some(stmt) = extract_single_statement(cursor) {
1497                    if_data.then_block.push(stmt);
1498                }
1499            }
1500            if_data.child_index += 1;
1501            CXChildVisit_Continue
1502        }
1503        2 => {
1504            // Third child (optional): else block
1505            // DECY-216: Handle compound, if-else chain, and single statement
1506            if kind == CXCursor_CompoundStmt {
1507                let mut else_stmts = Vec::new();
1508                let body_ptr = &mut else_stmts as *mut Vec<Statement>;
1509                unsafe {
1510                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1511                }
1512                if_data.else_block = Some(else_stmts);
1513            } else if kind == CXCursor_IfStmt {
1514                // else if chain
1515                let mut else_stmts = Vec::new();
1516                let body_ptr = &mut else_stmts as *mut Vec<Statement>;
1517                unsafe {
1518                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1519                }
1520                if_data.else_block = Some(else_stmts);
1521            } else {
1522                // Single statement in else: else return 0;
1523                if let Some(stmt) = extract_single_statement(cursor) {
1524                    if_data.else_block = Some(vec![stmt]);
1525                }
1526            }
1527            if_data.child_index += 1;
1528            CXChildVisit_Continue
1529        }
1530        _ => CXChildVisit_Continue,
1531    }
1532}
1533
1534/// Extract a for loop statement.
1535fn extract_for_stmt(cursor: CXCursor) -> Option<Statement> {
1536    // DECY-200: Two-pass approach to handle for loops with empty parts
1537    // Clang skips empty parts entirely, so we can't rely on fixed indices
1538    //
1539    // Pass 1: Collect all children with their cursor kinds
1540    // Pass 2: Identify what each child represents based on type and position
1541
1542    #[repr(C)]
1543    struct ForChildInfo {
1544        cursor: CXCursor,
1545        kind: i32,
1546    }
1547
1548    #[repr(C)]
1549    struct ForCollector {
1550        children: Vec<ForChildInfo>,
1551    }
1552
1553    // First pass: collect all children
1554    extern "C" fn collect_for_children(
1555        cursor: CXCursor,
1556        _parent: CXCursor,
1557        client_data: CXClientData,
1558    ) -> CXChildVisitResult {
1559        let collector = unsafe { &mut *(client_data as *mut ForCollector) };
1560        let kind = unsafe { clang_getCursorKind(cursor) };
1561        collector.children.push(ForChildInfo { cursor, kind });
1562        CXChildVisit_Continue
1563    }
1564
1565    let mut collector = ForCollector {
1566        children: Vec::new(),
1567    };
1568
1569    unsafe {
1570        clang_visitChildren(cursor, collect_for_children, &mut collector as *mut _ as CXClientData);
1571    }
1572
1573    // Second pass: identify what each child is
1574    let mut init: Option<Box<Statement>> = None;
1575    let mut condition: Option<Expression> = None;
1576    let mut increment: Option<Box<Statement>> = None;
1577    let mut body: Vec<Statement> = Vec::new();
1578
1579    let num_children = collector.children.len();
1580
1581    // Body is always the LAST child
1582    // The children before body are init/condition/increment in that order,
1583    // but clang omits empty ones
1584
1585    // Helper to check if a BinaryOperator is an assignment
1586    fn is_assignment_op(cursor: CXCursor) -> bool {
1587        if let Some(op) = extract_binary_operator(cursor) {
1588            matches!(op, BinaryOperator::Assign)
1589        } else {
1590            false
1591        }
1592    }
1593
1594    // Helper to check if a BinaryOperator is a comparison/logical (condition)
1595    fn is_condition_op(cursor: CXCursor) -> bool {
1596        if let Some(op) = extract_binary_operator(cursor) {
1597            matches!(
1598                op,
1599                BinaryOperator::Equal |
1600                BinaryOperator::NotEqual |
1601                BinaryOperator::LessThan |
1602                BinaryOperator::GreaterThan |
1603                BinaryOperator::LessEqual |
1604                BinaryOperator::GreaterEqual |
1605                BinaryOperator::LogicalAnd |
1606                BinaryOperator::LogicalOr
1607            )
1608        } else {
1609            false
1610        }
1611    }
1612
1613    if num_children == 0 {
1614        return Some(Statement::For { init, condition, increment, body });
1615    }
1616
1617    // Process children based on count and types
1618    // The LAST child is always the body
1619    let body_idx = num_children - 1;
1620    let body_child = &collector.children[body_idx];
1621
1622    // Extract body
1623    if body_child.kind == CXCursor_CompoundStmt {
1624        let body_ptr = &mut body as *mut Vec<Statement>;
1625        unsafe {
1626            clang_visitChildren(body_child.cursor, visit_statement, body_ptr as CXClientData);
1627        }
1628    } else {
1629        // Single statement body - extract it
1630        if let Some(stmt) = extract_single_statement(body_child.cursor) {
1631            body.push(stmt);
1632        }
1633    }
1634
1635    // Process children before body
1636    let pre_body = &collector.children[..body_idx];
1637
1638    match pre_body.len() {
1639        0 => {
1640            // for (;;) - infinite loop with no init/condition/increment
1641        }
1642        1 => {
1643            // One child before body - could be init, condition, or increment
1644            // Use heuristics to determine which
1645            let child = &pre_body[0];
1646            if child.kind == CXCursor_DeclStmt { // DeclStmt - always init
1647                let mut init_stmts = Vec::new();
1648                let ptr = &mut init_stmts as *mut Vec<Statement>;
1649                unsafe {
1650                    clang_visitChildren(child.cursor, visit_statement, ptr as CXClientData);
1651                }
1652                if let Some(stmt) = init_stmts.into_iter().next() {
1653                    init = Some(Box::new(stmt));
1654                }
1655            } else if child.kind == CXCursor_BinaryOperator {
1656                if is_assignment_op(child.cursor) {
1657                    // Assignment - treat as init
1658                    if let Some(stmt) = extract_assignment_stmt(child.cursor) {
1659                        init = Some(Box::new(stmt));
1660                    }
1661                } else if is_condition_op(child.cursor) {
1662                    // Comparison - treat as condition
1663                    condition = extract_binary_op(child.cursor);
1664                } else {
1665                    // Ambiguous - default to condition
1666                    condition = extract_binary_op(child.cursor);
1667                }
1668            } else if child.kind == CXCursor_UnaryOperator {
1669                if let Some(stmt) = extract_inc_dec_stmt(child.cursor) {
1670                    increment = Some(Box::new(stmt));
1671                }
1672            } else {
1673                // Treat as condition by default
1674                condition = extract_expression_from_cursor(child.cursor);
1675            }
1676        }
1677        2 => {
1678            // Two children before body
1679            // Most common case: condition and increment (init is empty)
1680            let child0 = &pre_body[0];
1681            let child1 = &pre_body[1];
1682
1683            // Check if first child is init (DeclStmt or assignment)
1684            let first_is_init = child0.kind == CXCursor_DeclStmt ||
1685                (child0.kind == CXCursor_BinaryOperator && is_assignment_op(child0.cursor));
1686
1687            if first_is_init {
1688                // child0 = init, child1 = condition (skip increment)
1689                if child0.kind == CXCursor_DeclStmt {
1690                    let mut init_stmts = Vec::new();
1691                    let ptr = &mut init_stmts as *mut Vec<Statement>;
1692                    unsafe {
1693                        clang_visitChildren(child0.cursor, visit_statement, ptr as CXClientData);
1694                    }
1695                    if let Some(stmt) = init_stmts.into_iter().next() {
1696                        init = Some(Box::new(stmt));
1697                    }
1698                } else if let Some(stmt) = extract_assignment_stmt(child0.cursor) {
1699                    init = Some(Box::new(stmt));
1700                }
1701                condition = extract_expression_from_cursor(child1.cursor);
1702            } else {
1703                // child0 = condition, child1 = increment (no init)
1704                condition = extract_expression_from_cursor(child0.cursor);
1705                if child1.kind == CXCursor_BinaryOperator {
1706                    if let Some(stmt) = extract_assignment_stmt(child1.cursor) {
1707                        increment = Some(Box::new(stmt));
1708                    }
1709                } else if child1.kind == CXCursor_UnaryOperator {
1710                    if let Some(stmt) = extract_inc_dec_stmt(child1.cursor) {
1711                        increment = Some(Box::new(stmt));
1712                    }
1713                }
1714            }
1715        }
1716        3 => {
1717            // Three children before body - init, condition, increment all present
1718            let child0 = &pre_body[0];
1719            let child1 = &pre_body[1];
1720            let child2 = &pre_body[2];
1721
1722            // Init
1723            if child0.kind == CXCursor_DeclStmt {
1724                let mut init_stmts = Vec::new();
1725                let ptr = &mut init_stmts as *mut Vec<Statement>;
1726                unsafe {
1727                    clang_visitChildren(child0.cursor, visit_statement, ptr as CXClientData);
1728                }
1729                if let Some(stmt) = init_stmts.into_iter().next() {
1730                    init = Some(Box::new(stmt));
1731                }
1732            } else if child0.kind == CXCursor_BinaryOperator {
1733                if let Some(stmt) = extract_assignment_stmt(child0.cursor) {
1734                    init = Some(Box::new(stmt));
1735                }
1736            }
1737
1738            // Condition
1739            condition = extract_expression_from_cursor(child1.cursor);
1740
1741            // Increment
1742            if child2.kind == CXCursor_BinaryOperator {
1743                if let Some(stmt) = extract_assignment_stmt(child2.cursor) {
1744                    increment = Some(Box::new(stmt));
1745                }
1746            } else if child2.kind == CXCursor_UnaryOperator {
1747                if let Some(stmt) = extract_inc_dec_stmt(child2.cursor) {
1748                    increment = Some(Box::new(stmt));
1749                }
1750            }
1751        }
1752        _ => {
1753            // More than 3 children before body - unexpected, handle gracefully
1754        }
1755    }
1756
1757    Some(Statement::For {
1758        init,
1759        condition,
1760        increment,
1761        body,
1762    })
1763}
1764
1765/// Extract expression from cursor for for-loop condition
1766fn extract_expression_from_cursor(cursor: CXCursor) -> Option<Expression> {
1767    let kind = unsafe { clang_getCursorKind(cursor) };
1768    match kind {
1769        CXCursor_BinaryOperator => extract_binary_op(cursor),
1770        CXCursor_IntegerLiteral => extract_int_literal(cursor),
1771        107 => extract_float_literal(cursor), // CXCursor_FloatingLiteral
1772        110 => extract_char_literal(cursor), // CXCursor_CharacterLiteral
1773        CXCursor_DeclRefExpr => extract_variable_ref(cursor),
1774        CXCursor_CallExpr => extract_function_call(cursor),
1775        CXCursor_UnaryOperator => extract_unary_op(cursor),
1776        _ => {
1777            let mut expr: Option<Expression> = None;
1778            let expr_ptr = &mut expr as *mut Option<Expression>;
1779            unsafe {
1780                clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
1781            }
1782            expr
1783        }
1784    }
1785}
1786
1787/// Extract a single statement from a cursor (for non-compound for bodies)
1788fn extract_single_statement(cursor: CXCursor) -> Option<Statement> {
1789    let kind = unsafe { clang_getCursorKind(cursor) };
1790    match kind {
1791        CXCursor_IfStmt => extract_if_stmt(cursor),
1792        CXCursor_ForStmt => extract_for_stmt(cursor),
1793        CXCursor_WhileStmt => extract_while_stmt(cursor),
1794        CXCursor_ReturnStmt => extract_return_stmt(cursor),
1795        CXCursor_SwitchStmt => extract_switch_stmt(cursor),
1796        CXCursor_UnaryOperator => extract_inc_dec_stmt(cursor),
1797        CXCursor_BinaryOperator => extract_assignment_stmt(cursor),
1798        CXCursor_CallExpr => {
1799            if let Some(Expression::FunctionCall { function, arguments }) = extract_function_call(cursor) {
1800                Some(Statement::FunctionCall { function, arguments })
1801            } else {
1802                None
1803            }
1804        }
1805        CXCursor_BreakStmt => Some(Statement::Break),
1806        CXCursor_ContinueStmt => Some(Statement::Continue),
1807        CXCursor_DoStmt | CXCursor_NullStmt => None, // Not supported yet
1808        _ => None,
1809    }
1810}
1811/// Extract a while loop statement.
1812fn extract_while_stmt(cursor: CXCursor) -> Option<Statement> {
1813    // A while loop has 2 children:
1814    // 1. Condition expression
1815    // 2. Body (compound statement)
1816
1817    #[repr(C)]
1818    struct WhileData {
1819        condition: Option<Expression>,
1820        body: Vec<Statement>,
1821        child_index: u32,
1822    }
1823
1824    let mut while_data = WhileData {
1825        condition: None,
1826        body: Vec::new(),
1827        child_index: 0,
1828    };
1829
1830    let data_ptr = &mut while_data as *mut WhileData;
1831
1832    unsafe {
1833        clang_visitChildren(cursor, visit_while_children, data_ptr as CXClientData);
1834    }
1835
1836    Some(Statement::While {
1837        condition: while_data.condition?,
1838        body: while_data.body,
1839    })
1840}
1841
1842/// Visitor for while loop children.
1843#[allow(non_upper_case_globals)]
1844extern "C" fn visit_while_children(
1845    cursor: CXCursor,
1846    _parent: CXCursor,
1847    client_data: CXClientData,
1848) -> CXChildVisitResult {
1849    #[repr(C)]
1850    struct WhileData {
1851        condition: Option<Expression>,
1852        body: Vec<Statement>,
1853        child_index: u32,
1854    }
1855
1856    let while_data = unsafe { &mut *(client_data as *mut WhileData) };
1857    let kind = unsafe { clang_getCursorKind(cursor) };
1858
1859    match while_data.child_index {
1860        0 => {
1861            // First child: condition expression
1862            // The cursor itself IS the condition, extract it directly
1863            while_data.condition = match kind {
1864                CXCursor_BinaryOperator => extract_binary_op(cursor),
1865                CXCursor_IntegerLiteral => extract_int_literal(cursor),
1866                107 => extract_float_literal(cursor), // CXCursor_FloatingLiteral
1867                110 => extract_char_literal(cursor), // CXCursor_CharacterLiteral
1868                CXCursor_DeclRefExpr => extract_variable_ref(cursor),
1869                CXCursor_CallExpr => extract_function_call(cursor),
1870                CXCursor_UnaryOperator => extract_unary_op(cursor),
1871                _ => {
1872                    let mut cond_expr: Option<Expression> = None;
1873                    let expr_ptr = &mut cond_expr as *mut Option<Expression>;
1874                    unsafe {
1875                        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
1876                    }
1877                    cond_expr
1878                }
1879            };
1880            while_data.child_index += 1;
1881            CXChildVisit_Continue
1882        }
1883        1 => {
1884            // Second child: body
1885            if kind == CXCursor_CompoundStmt {
1886                let body_ptr = &mut while_data.body as *mut Vec<Statement>;
1887                unsafe {
1888                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1889                }
1890            }
1891            while_data.child_index += 1;
1892            CXChildVisit_Continue
1893        }
1894        _ => CXChildVisit_Continue,
1895    }
1896}
1897
1898/// Extract a switch statement from a cursor.
1899///
1900/// Parses switch statements including cases and default labels.
1901#[allow(non_upper_case_globals)]
1902fn extract_switch_stmt(cursor: CXCursor) -> Option<Statement> {
1903    // Switch has 2 children:
1904    // 1. Condition expression
1905    // 2. Body (compound statement containing case/default labels)
1906
1907    #[repr(C)]
1908    struct SwitchData {
1909        condition: Option<Expression>,
1910        cases: Vec<SwitchCase>,
1911        default_case: Option<Vec<Statement>>,
1912        child_index: u32,
1913    }
1914
1915    let mut switch_data = SwitchData {
1916        condition: None,
1917        cases: Vec::new(),
1918        default_case: None,
1919        child_index: 0,
1920    };
1921
1922    let data_ptr = &mut switch_data as *mut SwitchData;
1923
1924    unsafe {
1925        clang_visitChildren(cursor, visit_switch_children, data_ptr as CXClientData);
1926    }
1927
1928    Some(Statement::Switch {
1929        condition: switch_data.condition?,
1930        cases: switch_data.cases,
1931        default_case: switch_data.default_case,
1932    })
1933}
1934
1935/// Visitor callback for switch statement children (condition and body).
1936#[allow(non_upper_case_globals)]
1937extern "C" fn visit_switch_children(
1938    cursor: CXCursor,
1939    _parent: CXCursor,
1940    client_data: CXClientData,
1941) -> CXChildVisitResult {
1942    #[repr(C)]
1943    struct SwitchData {
1944        condition: Option<Expression>,
1945        cases: Vec<SwitchCase>,
1946        default_case: Option<Vec<Statement>>,
1947        child_index: u32,
1948    }
1949
1950    let switch_data = unsafe { &mut *(client_data as *mut SwitchData) };
1951    let kind = unsafe { clang_getCursorKind(cursor) };
1952
1953    match switch_data.child_index {
1954        0 => {
1955            // First child: condition expression
1956            if let Some(expr) = try_extract_expression(cursor) {
1957                switch_data.condition = Some(expr);
1958            }
1959            switch_data.child_index += 1;
1960            CXChildVisit_Continue
1961        }
1962        1 => {
1963            // Second child: compound statement body containing cases
1964            // Need to visit this recursively to find case/default labels
1965            if kind == CXCursor_CompoundStmt {
1966                unsafe {
1967                    clang_visitChildren(cursor, visit_switch_body, client_data);
1968                }
1969            }
1970            switch_data.child_index += 1;
1971            CXChildVisit_Continue
1972        }
1973        _ => CXChildVisit_Continue,
1974    }
1975}
1976
1977/// Visitor callback for switch body to extract cases and default.
1978#[allow(non_upper_case_globals)]
1979extern "C" fn visit_switch_body(
1980    cursor: CXCursor,
1981    _parent: CXCursor,
1982    client_data: CXClientData,
1983) -> CXChildVisitResult {
1984    #[repr(C)]
1985    struct SwitchData {
1986        condition: Option<Expression>,
1987        cases: Vec<SwitchCase>,
1988        default_case: Option<Vec<Statement>>,
1989        child_index: u32,
1990    }
1991
1992    let switch_data = unsafe { &mut *(client_data as *mut SwitchData) };
1993    let kind = unsafe { clang_getCursorKind(cursor) };
1994
1995    match kind {
1996        CXCursor_CaseStmt => {
1997            // Extract case statement
1998            if let Some(case) = extract_case_stmt(cursor) {
1999                switch_data.cases.push(case);
2000            }
2001            CXChildVisit_Continue
2002        }
2003        CXCursor_DefaultStmt => {
2004            // Extract default statement
2005            if let Some(body) = extract_default_stmt(cursor) {
2006                switch_data.default_case = Some(body);
2007            }
2008            CXChildVisit_Continue
2009        }
2010        _ => CXChildVisit_Continue,
2011    }
2012}
2013
2014/// Extract a case statement from a cursor.
2015fn extract_case_stmt(cursor: CXCursor) -> Option<SwitchCase> {
2016    // Case statement has 2 children:
2017    // 1. Case value expression
2018    // 2. Body (statements following the case label)
2019
2020    #[repr(C)]
2021    struct CaseData {
2022        value: Option<Expression>,
2023        body: Vec<Statement>,
2024        child_index: u32,
2025    }
2026
2027    let mut case_data = CaseData {
2028        value: None,
2029        body: Vec::new(),
2030        child_index: 0,
2031    };
2032
2033    let data_ptr = &mut case_data as *mut CaseData;
2034
2035    unsafe {
2036        clang_visitChildren(cursor, visit_case_children, data_ptr as CXClientData);
2037    }
2038
2039    Some(SwitchCase {
2040        value: case_data.value,
2041        body: case_data.body,
2042    })
2043}
2044
2045/// Visitor for case statement children.
2046#[allow(non_upper_case_globals)]
2047extern "C" fn visit_case_children(
2048    cursor: CXCursor,
2049    _parent: CXCursor,
2050    client_data: CXClientData,
2051) -> CXChildVisitResult {
2052    #[repr(C)]
2053    struct CaseData {
2054        value: Option<Expression>,
2055        body: Vec<Statement>,
2056        child_index: u32,
2057    }
2058
2059    let case_data = unsafe { &mut *(client_data as *mut CaseData) };
2060    let _kind = unsafe { clang_getCursorKind(cursor) };
2061
2062    match case_data.child_index {
2063        0 => {
2064            // First child: case value expression
2065            if let Some(expr) = try_extract_expression(cursor) {
2066                case_data.value = Some(expr);
2067            }
2068            case_data.child_index += 1;
2069            CXChildVisit_Continue
2070        }
2071        _ => {
2072            // Subsequent children: statements in case body
2073            // Extract statements until we hit another case or default
2074            if let Some(stmt) = extract_statement(cursor) {
2075                case_data.body.push(stmt);
2076            }
2077            // Continue recursing to find all statements in the case body
2078            CXChildVisit_Recurse
2079        }
2080    }
2081}
2082
2083/// Extract a default statement from a cursor.
2084fn extract_default_stmt(cursor: CXCursor) -> Option<Vec<Statement>> {
2085    // Default statement has body statements as children
2086    let mut body: Vec<Statement> = Vec::new();
2087    let body_ptr = &mut body as *mut Vec<Statement>;
2088
2089    unsafe {
2090        clang_visitChildren(cursor, visit_default_children, body_ptr as CXClientData);
2091    }
2092
2093    Some(body)
2094}
2095
2096/// Visitor for default statement children.
2097#[allow(non_upper_case_globals)]
2098extern "C" fn visit_default_children(
2099    cursor: CXCursor,
2100    _parent: CXCursor,
2101    client_data: CXClientData,
2102) -> CXChildVisitResult {
2103    let body = unsafe { &mut *(client_data as *mut Vec<Statement>) };
2104
2105    // Extract all statements in default body
2106    if let Some(stmt) = extract_statement(cursor) {
2107        body.push(stmt);
2108    }
2109
2110    CXChildVisit_Continue
2111}
2112
2113/// Helper function to extract a statement from a cursor based on its kind.
2114#[allow(non_upper_case_globals)]
2115fn extract_statement(cursor: CXCursor) -> Option<Statement> {
2116    let kind = unsafe { clang_getCursorKind(cursor) };
2117
2118    match kind {
2119        CXCursor_ReturnStmt => extract_return_stmt(cursor),
2120        CXCursor_VarDecl => extract_var_decl(cursor),
2121        CXCursor_IfStmt => extract_if_stmt(cursor),
2122        CXCursor_ForStmt => extract_for_stmt(cursor),
2123        CXCursor_WhileStmt => extract_while_stmt(cursor),
2124        CXCursor_BreakStmt => Some(Statement::Break),
2125        CXCursor_ContinueStmt => Some(Statement::Continue),
2126        CXCursor_UnaryOperator => extract_inc_dec_stmt(cursor),
2127        CXCursor_BinaryOperator => extract_assignment_stmt(cursor),
2128        CXCursor_CallExpr => {
2129            // Function call as statement
2130            if let Some(Expression::FunctionCall {
2131                function,
2132                arguments,
2133            }) = extract_function_call(cursor)
2134            {
2135                return Some(Statement::FunctionCall {
2136                    function,
2137                    arguments,
2138                });
2139            }
2140            None
2141        }
2142        _ => None,
2143    }
2144}
2145
2146/// Visitor callback for extracting expressions.
2147///
2148/// # Safety
2149///
2150/// This function is called by clang_visitChildren and must follow C calling conventions.
2151#[allow(non_upper_case_globals)]
2152extern "C" fn visit_expression(
2153    cursor: CXCursor,
2154    _parent: CXCursor,
2155    client_data: CXClientData,
2156) -> CXChildVisitResult {
2157    // SAFETY: Converting client data back to expression option pointer
2158    let expr_opt = unsafe { &mut *(client_data as *mut Option<Expression>) };
2159
2160    // SAFETY: Getting cursor kind
2161    let kind = unsafe { clang_getCursorKind(cursor) };
2162
2163    match kind {
2164        CXCursor_IntegerLiteral => {
2165            // Integer literal
2166            if let Some(expr) = extract_int_literal(cursor) {
2167                *expr_opt = Some(expr);
2168            }
2169            CXChildVisit_Continue
2170        }
2171        107 => {
2172            // Floating-point literal (CXCursor_FloatingLiteral)
2173            if let Some(expr) = extract_float_literal(cursor) {
2174                *expr_opt = Some(expr);
2175            }
2176            CXChildVisit_Continue
2177        }
2178        CXCursor_StringLiteral => {
2179            // String literal
2180            if let Some(expr) = extract_string_literal(cursor) {
2181                *expr_opt = Some(expr);
2182            }
2183            CXChildVisit_Continue
2184        }
2185        110 => {
2186            // Character literal (CXCursor_CharacterLiteral)
2187            if let Some(expr) = extract_char_literal(cursor) {
2188                *expr_opt = Some(expr);
2189            }
2190            CXChildVisit_Continue
2191        }
2192        CXCursor_DeclRefExpr => {
2193            // Variable reference (e.g., "a" or "b" in "a + b")
2194            if let Some(expr) = extract_variable_ref(cursor) {
2195                *expr_opt = Some(expr);
2196            }
2197            CXChildVisit_Continue
2198        }
2199        CXCursor_BinaryOperator => {
2200            // Binary operation (e.g., a + b)
2201            if let Some(expr) = extract_binary_op(cursor) {
2202                *expr_opt = Some(expr);
2203            }
2204            CXChildVisit_Continue
2205        }
2206        CXCursor_CallExpr => {
2207            // Function call
2208            if let Some(expr) = extract_function_call(cursor) {
2209                *expr_opt = Some(expr);
2210            }
2211            CXChildVisit_Continue
2212        }
2213        CXCursor_UnaryOperator => {
2214            // Unary operator (e.g., *ptr dereference)
2215            if let Some(expr) = extract_unary_op(cursor) {
2216                *expr_opt = Some(expr);
2217            }
2218            CXChildVisit_Continue
2219        }
2220        CXCursor_ArraySubscriptExpr => {
2221            // Array indexing (e.g., arr[i])
2222            if let Some(expr) = extract_array_index(cursor) {
2223                *expr_opt = Some(expr);
2224            }
2225            CXChildVisit_Continue
2226        }
2227        CXCursor_MemberRefExpr => {
2228            // Field access (e.g., ptr->field or obj.field)
2229            if let Some(expr) = extract_field_access(cursor) {
2230                *expr_opt = Some(expr);
2231            }
2232            CXChildVisit_Continue
2233        }
2234        116 => {
2235            // CXCursor_ConditionalOperator (ternary)
2236            // DECY-192: Ternary expressions like (a > b) ? a : b
2237            if let Some(expr) = extract_conditional_op(cursor) {
2238                *expr_opt = Some(expr);
2239            }
2240            CXChildVisit_Continue
2241        }
2242        117 => {
2243            // CXCursor_CStyleCastExpr - cast expression like (int)x or (long)&ptr
2244            // DECY-208: Extract cast expressions to preserve type conversions
2245            if let Some(expr) = extract_cast(cursor) {
2246                *expr_opt = Some(expr);
2247            }
2248            CXChildVisit_Continue
2249        }
2250        CXCursor_UnexposedExpr => {
2251            // Unexposed expressions might wrap other expressions (like ImplicitCastExpr wrapping CallExpr)
2252            // Recurse first to check if there's a more specific expression inside
2253            CXChildVisit_Recurse
2254        }
2255        CXCursor_ParenExpr => {
2256            // Parenthesized expressions wrap other expressions, recurse
2257            CXChildVisit_Recurse
2258        }
2259        136 => {
2260            // CXCursor_UnaryExpr - could be sizeof or other unary expr
2261            if let Some(expr) = extract_sizeof(cursor) {
2262                *expr_opt = Some(expr);
2263                CXChildVisit_Continue
2264            } else {
2265                // Not sizeof, recurse for other unary expressions
2266                CXChildVisit_Recurse
2267            }
2268        }
2269        119 => {
2270            // CXCursor_InitListExpr - initializer list for struct/array
2271            // DECY-133: Handle designated initializers like {.x = 10, .y = 20}
2272            if let Some(expr) = extract_init_list(cursor) {
2273                *expr_opt = Some(expr);
2274            }
2275            CXChildVisit_Continue
2276        }
2277        _ => CXChildVisit_Recurse,
2278    }
2279}
2280
2281/// Extract an integer literal expression.
2282fn extract_int_literal(cursor: CXCursor) -> Option<Expression> {
2283    // SAFETY: Get the extent (source range) of the cursor
2284    let extent = unsafe { clang_getCursorExtent(cursor) };
2285
2286    // SAFETY: Get the translation unit from the cursor
2287    let tu = unsafe {
2288        let loc = clang_getCursorLocation(cursor);
2289        let mut file = ptr::null_mut();
2290        let mut line = 0;
2291        let mut column = 0;
2292        let mut offset = 0;
2293        clang_getFileLocation(loc, &mut file, &mut line, &mut column, &mut offset);
2294
2295        // Get the translation unit containing this cursor
2296        // We need to traverse up to get it, but for now use a different approach
2297        clang_Cursor_getTranslationUnit(cursor)
2298    };
2299
2300    if tu.is_null() {
2301        return Some(Expression::IntLiteral(0));
2302    }
2303
2304    // SAFETY: Tokenize the extent
2305    let mut tokens = ptr::null_mut();
2306    let mut num_tokens = 0;
2307
2308    unsafe {
2309        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2310    }
2311
2312    let mut value = 0;
2313
2314    if num_tokens > 0 {
2315        // SAFETY: Get the spelling of the first token
2316        unsafe {
2317            let token_cxstring = clang_getTokenSpelling(tu, *tokens);
2318            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2319            if let Ok(token_str) = c_str.to_str() {
2320                value = token_str.parse().unwrap_or(0);
2321            }
2322            clang_disposeString(token_cxstring);
2323
2324            // SAFETY: Dispose tokens
2325            clang_disposeTokens(tu, tokens, num_tokens);
2326        }
2327    } else {
2328        // DECY-195: Fallback for system headers where tokenization fails
2329        // Use clang_Cursor_Evaluate to get the constant value
2330        unsafe {
2331            let eval_result = clang_Cursor_Evaluate(cursor);
2332            if !eval_result.is_null() {
2333                value = clang_EvalResult_getAsInt(eval_result);
2334                clang_EvalResult_dispose(eval_result);
2335            }
2336        }
2337    }
2338
2339    Some(Expression::IntLiteral(value))
2340}
2341
2342/// DECY-207: Extract a floating-point literal expression.
2343fn extract_float_literal(cursor: CXCursor) -> Option<Expression> {
2344    // SAFETY: Get the extent (source range) of the cursor
2345    let extent = unsafe { clang_getCursorExtent(cursor) };
2346
2347    // SAFETY: Get the translation unit from the cursor
2348    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2349
2350    if tu.is_null() {
2351        return Some(Expression::FloatLiteral("0.0".to_string()));
2352    }
2353
2354    // SAFETY: Tokenize the extent
2355    let mut tokens = ptr::null_mut();
2356    let mut num_tokens = 0;
2357
2358    unsafe {
2359        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2360    }
2361
2362    let mut value = "0.0".to_string();
2363
2364    if num_tokens > 0 {
2365        // SAFETY: Get the spelling of the first token
2366        unsafe {
2367            let token_cxstring = clang_getTokenSpelling(tu, *tokens);
2368            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2369            if let Ok(token_str) = c_str.to_str() {
2370                // Keep the string as-is (preserves precision)
2371                value = token_str.to_string();
2372            }
2373            clang_disposeString(token_cxstring);
2374
2375            // SAFETY: Dispose tokens
2376            clang_disposeTokens(tu, tokens, num_tokens);
2377        }
2378    } else {
2379        // Fallback using evaluate
2380        unsafe {
2381            let eval_result = clang_Cursor_Evaluate(cursor);
2382            if !eval_result.is_null() {
2383                let float_val = clang_EvalResult_getAsDouble(eval_result);
2384                value = format!("{}", float_val);
2385                clang_EvalResult_dispose(eval_result);
2386            }
2387        }
2388    }
2389
2390    Some(Expression::FloatLiteral(value))
2391}
2392
2393/// Extract a string literal expression.
2394fn extract_string_literal(cursor: CXCursor) -> Option<Expression> {
2395    // SAFETY: Get the extent (source range) of the cursor
2396    let extent = unsafe { clang_getCursorExtent(cursor) };
2397
2398    // SAFETY: Get the translation unit from the cursor
2399    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2400
2401    if tu.is_null() {
2402        return Some(Expression::StringLiteral(String::new()));
2403    }
2404
2405    // SAFETY: Tokenize the extent
2406    let mut tokens = ptr::null_mut();
2407    let mut num_tokens = 0;
2408
2409    unsafe {
2410        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2411    }
2412
2413    let mut value = String::new();
2414
2415    if num_tokens > 0 {
2416        // SAFETY: Get the spelling of the first token
2417        unsafe {
2418            let token_cxstring = clang_getTokenSpelling(tu, *tokens);
2419            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2420            if let Ok(token_str) = c_str.to_str() {
2421                // Remove surrounding quotes from string literal
2422                value = token_str.trim_matches('"').to_string();
2423            }
2424            clang_disposeString(token_cxstring);
2425
2426            // SAFETY: Dispose tokens
2427            clang_disposeTokens(tu, tokens, num_tokens);
2428        }
2429    }
2430
2431    Some(Expression::StringLiteral(value))
2432}
2433
2434/// Extract a character literal expression.
2435/// Handles plain characters ('a'), escape sequences ('\0', '\n', '\t', etc.)
2436fn extract_char_literal(cursor: CXCursor) -> Option<Expression> {
2437    // SAFETY: Get the extent (source range) of the cursor
2438    let extent = unsafe { clang_getCursorExtent(cursor) };
2439
2440    // SAFETY: Get the translation unit from the cursor
2441    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2442
2443    if tu.is_null() {
2444        return Some(Expression::CharLiteral(0));
2445    }
2446
2447    // SAFETY: Tokenize the extent
2448    let mut tokens = ptr::null_mut();
2449    let mut num_tokens = 0;
2450
2451    unsafe {
2452        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2453    }
2454
2455    let mut value: i8 = 0;
2456
2457    if num_tokens > 0 {
2458        // SAFETY: Get the spelling of the first token
2459        unsafe {
2460            let token_cxstring = clang_getTokenSpelling(tu, *tokens);
2461            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2462            if let Ok(token_str) = c_str.to_str() {
2463                // Remove surrounding quotes from character literal
2464                let inner = token_str.trim_matches('\'');
2465                value = parse_char_literal(inner);
2466            }
2467            clang_disposeString(token_cxstring);
2468
2469            // SAFETY: Dispose tokens
2470            clang_disposeTokens(tu, tokens, num_tokens);
2471        }
2472    }
2473
2474    Some(Expression::CharLiteral(value))
2475}
2476
2477/// Parse a character literal string (without quotes) into its i8 value.
2478/// Handles escape sequences like \0, \n, \t, \r, \\, \', \"
2479fn parse_char_literal(s: &str) -> i8 {
2480    if s.is_empty() {
2481        return 0;
2482    }
2483
2484    let mut chars = s.chars();
2485    let first = chars.next().unwrap();
2486
2487    if first == '\\' {
2488        // Escape sequence
2489        match chars.next() {
2490            Some('0') => 0, // null character
2491            Some('n') => b'\n' as i8,
2492            Some('t') => b'\t' as i8,
2493            Some('r') => b'\r' as i8,
2494            Some('\\') => b'\\' as i8,
2495            Some('\'') => b'\'' as i8,
2496            Some('"') => b'"' as i8,
2497            Some('a') => 7,  // bell
2498            Some('b') => 8,  // backspace
2499            Some('f') => 12, // form feed
2500            Some('v') => 11, // vertical tab
2501            Some('x') => {
2502                // Hex escape: \xNN
2503                let hex: String = chars.take(2).collect();
2504                i8::from_str_radix(&hex, 16).unwrap_or(0)
2505            }
2506            Some(c) if c.is_ascii_digit() => {
2507                // Octal escape: \NNN
2508                let mut octal = String::new();
2509                octal.push(c);
2510                for _ in 0..2 {
2511                    if let Some(d) = chars.next() {
2512                        if d.is_ascii_digit() && d < '8' {
2513                            octal.push(d);
2514                        } else {
2515                            break;
2516                        }
2517                    }
2518                }
2519                i8::from_str_radix(&octal, 8).unwrap_or(0)
2520            }
2521            _ => first as i8,
2522        }
2523    } else {
2524        // Plain character
2525        first as i8
2526    }
2527}
2528
2529/// Extract a variable reference expression.
2530fn extract_variable_ref(cursor: CXCursor) -> Option<Expression> {
2531    // Get variable name
2532    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
2533    let name = unsafe {
2534        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
2535        let var_name = c_str.to_string_lossy().into_owned();
2536        clang_disposeString(name_cxstring);
2537        var_name
2538    };
2539
2540    Some(Expression::Variable(name))
2541}
2542
2543/// Extract a binary operation expression.
2544fn extract_binary_op(cursor: CXCursor) -> Option<Expression> {
2545    // Extract operator by tokenizing
2546    let op = extract_binary_operator(cursor)?;
2547
2548    // Extract left and right operands by visiting children
2549    let mut operands: Vec<Expression> = Vec::new();
2550    let operands_ptr = &mut operands as *mut Vec<Expression>;
2551
2552    unsafe {
2553        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
2554    }
2555
2556    // Binary operators should have exactly 2 operands
2557    if operands.len() != 2 {
2558        return None;
2559    }
2560
2561    Some(Expression::BinaryOp {
2562        op,
2563        left: Box::new(operands[0].clone()),
2564        right: Box::new(operands[1].clone()),
2565    })
2566}
2567
2568/// Visitor callback for binary operator operands.
2569#[allow(non_upper_case_globals)]
2570extern "C" fn visit_binary_operand(
2571    cursor: CXCursor,
2572    _parent: CXCursor,
2573    client_data: CXClientData,
2574) -> CXChildVisitResult {
2575    let operands = unsafe { &mut *(client_data as *mut Vec<Expression>) };
2576    let kind = unsafe { clang_getCursorKind(cursor) };
2577
2578    match kind {
2579        CXCursor_IntegerLiteral => {
2580            if let Some(expr) = extract_int_literal(cursor) {
2581                operands.push(expr);
2582            }
2583            CXChildVisit_Continue
2584        }
2585        107 => {
2586            // Floating-point literal (CXCursor_FloatingLiteral)
2587            if let Some(expr) = extract_float_literal(cursor) {
2588                operands.push(expr);
2589            }
2590            CXChildVisit_Continue
2591        }
2592        CXCursor_StringLiteral => {
2593            if let Some(expr) = extract_string_literal(cursor) {
2594                operands.push(expr);
2595            }
2596            CXChildVisit_Continue
2597        }
2598        110 => {
2599            // Character literal (CXCursor_CharacterLiteral)
2600            if let Some(expr) = extract_char_literal(cursor) {
2601                operands.push(expr);
2602            }
2603            CXChildVisit_Continue
2604        }
2605        CXCursor_DeclRefExpr => {
2606            if let Some(expr) = extract_variable_ref(cursor) {
2607                operands.push(expr);
2608            }
2609            CXChildVisit_Continue
2610        }
2611        CXCursor_BinaryOperator => {
2612            // Nested binary operation
2613            if let Some(expr) = extract_binary_op(cursor) {
2614                operands.push(expr);
2615            }
2616            CXChildVisit_Continue
2617        }
2618        CXCursor_UnaryOperator => {
2619            // Unary operation (e.g., *ptr dereference)
2620            if let Some(expr) = extract_unary_op(cursor) {
2621                operands.push(expr);
2622            }
2623            CXChildVisit_Continue
2624        }
2625        CXCursor_ArraySubscriptExpr => {
2626            // Array indexing (e.g., arr[i])
2627            if let Some(expr) = extract_array_index(cursor) {
2628                operands.push(expr);
2629            }
2630            CXChildVisit_Continue
2631        }
2632        CXCursor_MemberRefExpr => {
2633            // Field access (e.g., ptr->field or obj.field)
2634            if let Some(expr) = extract_field_access(cursor) {
2635                operands.push(expr);
2636            }
2637            CXChildVisit_Continue
2638        }
2639        CXCursor_UnexposedExpr | CXCursor_ParenExpr => {
2640            // Unexposed expressions might be sizeof or wrap other expressions
2641            if let Some(expr) = extract_sizeof(cursor) {
2642                operands.push(expr);
2643                CXChildVisit_Continue
2644            } else {
2645                CXChildVisit_Recurse
2646            }
2647        }
2648        136 => {
2649            // CXCursor_UnaryExpr - includes sizeof, alignof, etc.
2650            if let Some(expr) = extract_sizeof(cursor) {
2651                operands.push(expr);
2652                CXChildVisit_Continue
2653            } else {
2654                CXChildVisit_Recurse
2655            }
2656        }
2657        CXCursor_CallExpr => {
2658            // Function call expression (e.g., malloc(size))
2659            if let Some(expr) = extract_function_call(cursor) {
2660                operands.push(expr);
2661            }
2662            CXChildVisit_Continue
2663        }
2664        116 => {
2665            // CXCursor_ConditionalOperator (ternary) - DECY-192
2666            if let Some(expr) = extract_conditional_op(cursor) {
2667                operands.push(expr);
2668            }
2669            CXChildVisit_Continue
2670        }
2671        _ => CXChildVisit_Recurse,
2672    }
2673}
2674
2675/// Extract the binary operator from a cursor by tokenizing.
2676#[allow(non_upper_case_globals)]
2677fn extract_binary_operator(cursor: CXCursor) -> Option<BinaryOperator> {
2678    // Get the translation unit
2679    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2680    if tu.is_null() {
2681        return None;
2682    }
2683
2684    // Get the extent (source range) of the cursor
2685    let extent = unsafe { clang_getCursorExtent(cursor) };
2686
2687    // Tokenize to find the operator
2688    let mut tokens = ptr::null_mut();
2689    let mut num_tokens = 0;
2690
2691    unsafe {
2692        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2693    }
2694
2695    let mut operator = None;
2696
2697    // Look through tokens to find the operator
2698    // For compound expressions like "a > 0 && b > 0", we need to find the LAST
2699    // operator (the one with lowest precedence) which represents THIS binary operation.
2700    // We scan from right to left to find operators with lowest precedence first.
2701    // Precedence (lowest to highest): || > && > == != > < > <= >= > + - > * / %
2702
2703    let mut candidates: Vec<(usize, BinaryOperator)> = Vec::new();
2704    let mut found_first_operand = false;
2705    let mut paren_depth: i32 = 0; // Track parenthesis nesting depth
2706
2707    for i in 0..num_tokens {
2708        unsafe {
2709            let token = *tokens.add(i as usize);
2710            let token_kind = clang_getTokenKind(token);
2711
2712            // Track when we've seen the first operand (identifier or literal)
2713            if token_kind == CXToken_Identifier || token_kind == CXToken_Literal {
2714                found_first_operand = true;
2715            }
2716
2717            // Track parenthesis depth to avoid operators inside function calls
2718            if token_kind == CXToken_Punctuation {
2719                let token_cxstring = clang_getTokenSpelling(tu, token);
2720                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2721                if let Ok(token_str) = c_str.to_str() {
2722                    match token_str {
2723                        "(" => paren_depth += 1,
2724                        ")" => paren_depth = paren_depth.saturating_sub(1),
2725                        _ => {}
2726                    }
2727
2728                    // Only collect operator candidates at depth 0 (outside parentheses)
2729                    // This fixes DECY-116: n * func(n - 1) was picking up the - inside parens
2730                    if found_first_operand && paren_depth == 0 {
2731                        let op = match token_str {
2732                            "+" => Some(BinaryOperator::Add),
2733                            "-" => Some(BinaryOperator::Subtract),
2734                            "*" => Some(BinaryOperator::Multiply),
2735                            "/" => Some(BinaryOperator::Divide),
2736                            "%" => Some(BinaryOperator::Modulo),
2737                            "==" => Some(BinaryOperator::Equal),
2738                            "!=" => Some(BinaryOperator::NotEqual),
2739                            "<" => Some(BinaryOperator::LessThan),
2740                            ">" => Some(BinaryOperator::GreaterThan),
2741                            "<=" => Some(BinaryOperator::LessEqual),
2742                            ">=" => Some(BinaryOperator::GreaterEqual),
2743                            "&&" => Some(BinaryOperator::LogicalAnd),
2744                            "||" => Some(BinaryOperator::LogicalOr),
2745                            // DECY-137: Bitwise and shift operators
2746                            "<<" => Some(BinaryOperator::LeftShift),
2747                            ">>" => Some(BinaryOperator::RightShift),
2748                            "&" => Some(BinaryOperator::BitwiseAnd),
2749                            "|" => Some(BinaryOperator::BitwiseOr),
2750                            "^" => Some(BinaryOperator::BitwiseXor),
2751                            // DECY-195: Assignment operator for embedded assignments like (c=getchar())
2752                            "=" => Some(BinaryOperator::Assign),
2753                            _ => None,
2754                        };
2755                        if let Some(op) = op {
2756                            candidates.push((i as usize, op));
2757                        }
2758                    }
2759                }
2760                clang_disposeString(token_cxstring);
2761            }
2762        }
2763    }
2764
2765    // Select the operator with lowest precedence (appears last in our search)
2766    // This handles cases like "a > 0 && b > 0" where && should be selected over >
2767    // C precedence (low to high): = > || > && > | > ^ > & > == != > < > <= >= > << >> > + - > * / %
2768    if !candidates.is_empty() {
2769        // DECY-195: Assignment has lowest precedence
2770        for (_, op) in &candidates {
2771            if matches!(op, BinaryOperator::Assign) {
2772                operator = Some(*op);
2773                break;
2774            }
2775        }
2776        // Find the first || operator (next lowest precedence)
2777        if operator.is_none() {
2778            for (_, op) in &candidates {
2779                if matches!(op, BinaryOperator::LogicalOr) {
2780                    operator = Some(*op);
2781                    break;
2782                }
2783            }
2784        }
2785        // If no ||, find first &&
2786        if operator.is_none() {
2787            for (_, op) in &candidates {
2788                if matches!(op, BinaryOperator::LogicalAnd) {
2789                    operator = Some(*op);
2790                    break;
2791                }
2792            }
2793        }
2794        // DECY-137: Bitwise OR (|)
2795        if operator.is_none() {
2796            for (_, op) in &candidates {
2797                if matches!(op, BinaryOperator::BitwiseOr) {
2798                    operator = Some(*op);
2799                    break;
2800                }
2801            }
2802        }
2803        // DECY-137: Bitwise XOR (^)
2804        if operator.is_none() {
2805            for (_, op) in &candidates {
2806                if matches!(op, BinaryOperator::BitwiseXor) {
2807                    operator = Some(*op);
2808                    break;
2809                }
2810            }
2811        }
2812        // DECY-137: Bitwise AND (&)
2813        if operator.is_none() {
2814            for (_, op) in &candidates {
2815                if matches!(op, BinaryOperator::BitwiseAnd) {
2816                    operator = Some(*op);
2817                    break;
2818                }
2819            }
2820        }
2821        // Equality operators (==, !=)
2822        if operator.is_none() {
2823            for (_, op) in &candidates {
2824                if matches!(op, BinaryOperator::Equal | BinaryOperator::NotEqual) {
2825                    operator = Some(*op);
2826                    break;
2827                }
2828            }
2829        }
2830        // Relational operators (<, >, <=, >=)
2831        if operator.is_none() {
2832            for (_, op) in &candidates {
2833                if matches!(
2834                    op,
2835                    BinaryOperator::LessThan
2836                        | BinaryOperator::GreaterThan
2837                        | BinaryOperator::LessEqual
2838                        | BinaryOperator::GreaterEqual
2839                ) {
2840                    operator = Some(*op);
2841                    break;
2842                }
2843            }
2844        }
2845        // DECY-137: Shift operators (<<, >>)
2846        if operator.is_none() {
2847            for (_, op) in &candidates {
2848                if matches!(op, BinaryOperator::LeftShift | BinaryOperator::RightShift) {
2849                    operator = Some(*op);
2850                    break;
2851                }
2852            }
2853        }
2854        // Additive operators (+, -)
2855        if operator.is_none() {
2856            for (_, op) in &candidates {
2857                if matches!(op, BinaryOperator::Add | BinaryOperator::Subtract) {
2858                    operator = Some(*op);
2859                    break;
2860                }
2861            }
2862        }
2863        // If no additive, take first multiplicative operator (*, /, %)
2864        if operator.is_none() {
2865            operator = Some(candidates[0].1);
2866        }
2867    }
2868
2869    unsafe {
2870        clang_disposeTokens(tu, tokens, num_tokens);
2871    }
2872
2873    operator
2874}
2875
2876/// Extract a function call expression.
2877fn extract_function_call(cursor: CXCursor) -> Option<Expression> {
2878    // Get function name
2879    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
2880    let function = unsafe {
2881        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
2882        let name = c_str.to_string_lossy().into_owned();
2883        clang_disposeString(name_cxstring);
2884        name
2885    };
2886
2887    // Extract arguments by visiting children
2888    // We use a struct to track if we've seen the function reference yet
2889    #[repr(C)]
2890    struct ArgData {
2891        arguments: Vec<Expression>,
2892        skip_first_declref: bool,
2893    }
2894
2895    let mut arg_data = ArgData {
2896        arguments: Vec::new(),
2897        skip_first_declref: true, // Skip the first DeclRefExpr (function name)
2898    };
2899    let args_ptr = &mut arg_data as *mut ArgData;
2900
2901    unsafe {
2902        clang_visitChildren(cursor, visit_call_argument, args_ptr as CXClientData);
2903    }
2904
2905    Some(Expression::FunctionCall {
2906        function,
2907        arguments: arg_data.arguments,
2908    })
2909}
2910
2911/// Visitor callback for function call arguments.
2912///
2913/// # Safety
2914///
2915/// This function is called by clang_visitChildren and must follow C calling conventions.
2916#[allow(non_upper_case_globals)]
2917extern "C" fn visit_call_argument(
2918    cursor: CXCursor,
2919    _parent: CXCursor,
2920    client_data: CXClientData,
2921) -> CXChildVisitResult {
2922    #[repr(C)]
2923    struct ArgData {
2924        arguments: Vec<Expression>,
2925        skip_first_declref: bool,
2926    }
2927
2928    // SAFETY: Converting client data back to ArgData pointer
2929    let arg_data = unsafe { &mut *(client_data as *mut ArgData) };
2930
2931    // SAFETY: Getting cursor kind
2932    let kind = unsafe { clang_getCursorKind(cursor) };
2933
2934    match kind {
2935        CXCursor_IntegerLiteral => {
2936            if let Some(expr) = extract_int_literal(cursor) {
2937                arg_data.arguments.push(expr);
2938            }
2939            CXChildVisit_Continue
2940        }
2941        107 => {
2942            // Floating-point literal (CXCursor_FloatingLiteral)
2943            if let Some(expr) = extract_float_literal(cursor) {
2944                arg_data.arguments.push(expr);
2945            }
2946            CXChildVisit_Continue
2947        }
2948        CXCursor_StringLiteral => {
2949            if let Some(expr) = extract_string_literal(cursor) {
2950                arg_data.arguments.push(expr);
2951            }
2952            CXChildVisit_Continue
2953        }
2954        110 => {
2955            // Character literal (CXCursor_CharacterLiteral)
2956            if let Some(expr) = extract_char_literal(cursor) {
2957                arg_data.arguments.push(expr);
2958            }
2959            CXChildVisit_Continue
2960        }
2961        CXCursor_DeclRefExpr => {
2962            // Variable reference argument
2963            // The first DeclRefExpr is the function being called, skip it
2964            if arg_data.skip_first_declref {
2965                arg_data.skip_first_declref = false;
2966                CXChildVisit_Continue
2967            } else {
2968                if let Some(expr) = extract_variable_ref(cursor) {
2969                    arg_data.arguments.push(expr);
2970                }
2971                CXChildVisit_Continue
2972            }
2973        }
2974        CXCursor_BinaryOperator => {
2975            // Binary operation in argument (e.g., x + 1, y * 2)
2976            if let Some(expr) = extract_binary_op(cursor) {
2977                arg_data.arguments.push(expr);
2978            }
2979            CXChildVisit_Continue
2980        }
2981        CXCursor_CallExpr => {
2982            // Nested function call (e.g., add(add(x, 5), add(10, 20)))
2983            if let Some(expr) = extract_function_call(cursor) {
2984                arg_data.arguments.push(expr);
2985            }
2986            CXChildVisit_Continue
2987        }
2988        CXCursor_UnaryOperator => {
2989            // Unary operation in argument (e.g., -x, !flag)
2990            if let Some(expr) = extract_unary_op(cursor) {
2991                arg_data.arguments.push(expr);
2992            }
2993            CXChildVisit_Continue
2994        }
2995        CXCursor_ArraySubscriptExpr => {
2996            // Array indexing in argument (e.g., arr[i])
2997            if let Some(expr) = extract_array_index(cursor) {
2998                arg_data.arguments.push(expr);
2999            }
3000            CXChildVisit_Continue
3001        }
3002        CXCursor_MemberRefExpr => {
3003            // Field access in argument (e.g., ptr->field or obj.field)
3004            if let Some(expr) = extract_field_access(cursor) {
3005                arg_data.arguments.push(expr);
3006            }
3007            CXChildVisit_Continue
3008        }
3009        116 => {
3010            // CXCursor_ConditionalOperator (ternary) - DECY-192
3011            if let Some(expr) = extract_conditional_op(cursor) {
3012                arg_data.arguments.push(expr);
3013            }
3014            CXChildVisit_Continue
3015        }
3016        CXCursor_UnexposedExpr | CXCursor_ParenExpr => {
3017            // Unexposed expressions might wrap actual expressions or be sizeof, try to extract
3018            if let Some(expr) = extract_sizeof(cursor) {
3019                arg_data.arguments.push(expr);
3020                CXChildVisit_Continue
3021            } else {
3022                CXChildVisit_Recurse
3023            }
3024        }
3025        136 => {
3026            // CXCursor_UnaryExpr - includes sizeof, alignof, etc.
3027            if let Some(expr) = extract_sizeof(cursor) {
3028                arg_data.arguments.push(expr);
3029                CXChildVisit_Continue
3030            } else {
3031                CXChildVisit_Recurse
3032            }
3033        }
3034        _ => CXChildVisit_Continue, // Skip other unknown children
3035    }
3036}
3037
3038/// Extract a unary operator expression.
3039fn extract_unary_op(cursor: CXCursor) -> Option<Expression> {
3040    // Get the translation unit
3041    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
3042    if tu.is_null() {
3043        return None;
3044    }
3045
3046    // Get the extent (source range) of the cursor
3047    let extent = unsafe { clang_getCursorExtent(cursor) };
3048
3049    // Tokenize to find the operator
3050    let mut tokens = ptr::null_mut();
3051    let mut num_tokens = 0;
3052
3053    unsafe {
3054        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
3055    }
3056
3057    let mut operator: Option<UnaryOperator> = None;
3058    let mut is_dereference = false;
3059    let mut is_increment = false;
3060    let mut is_decrement = false;
3061    let mut operator_position = 0;
3062
3063    // Look through tokens to find the unary operator
3064    for i in 0..num_tokens {
3065        unsafe {
3066            let token = *tokens.add(i as usize);
3067            let token_kind = clang_getTokenKind(token);
3068
3069            if token_kind == CXToken_Punctuation {
3070                let token_cxstring = clang_getTokenSpelling(tu, token);
3071                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
3072                if let Ok(token_str) = c_str.to_str() {
3073                    match token_str {
3074                        "*" => {
3075                            is_dereference = true;
3076                            clang_disposeString(token_cxstring);
3077                            break;
3078                        }
3079                        "-" => {
3080                            operator = Some(UnaryOperator::Minus);
3081                            clang_disposeString(token_cxstring);
3082                            break;
3083                        }
3084                        "!" => {
3085                            operator = Some(UnaryOperator::LogicalNot);
3086                            clang_disposeString(token_cxstring);
3087                            break;
3088                        }
3089                        "~" => {
3090                            operator = Some(UnaryOperator::BitwiseNot);
3091                            clang_disposeString(token_cxstring);
3092                            break;
3093                        }
3094                        "&" => {
3095                            operator = Some(UnaryOperator::AddressOf);
3096                            clang_disposeString(token_cxstring);
3097                            break;
3098                        }
3099                        "++" => {
3100                            is_increment = true;
3101                            operator_position = i;
3102                            clang_disposeString(token_cxstring);
3103                            break;
3104                        }
3105                        "--" => {
3106                            is_decrement = true;
3107                            operator_position = i;
3108                            clang_disposeString(token_cxstring);
3109                            break;
3110                        }
3111                        _ => {}
3112                    }
3113                }
3114                clang_disposeString(token_cxstring);
3115            }
3116        }
3117    }
3118
3119    unsafe {
3120        clang_disposeTokens(tu, tokens, num_tokens);
3121    }
3122
3123    // Extract the operand
3124    let mut operand: Option<Expression> = None;
3125    let operand_ptr = &mut operand as *mut Option<Expression>;
3126
3127    unsafe {
3128        clang_visitChildren(cursor, visit_expression, operand_ptr as CXClientData);
3129    }
3130
3131    let operand_expr = operand?;
3132
3133    // Handle dereference separately (maintains backward compatibility)
3134    if is_dereference {
3135        return Some(Expression::Dereference(Box::new(operand_expr)));
3136    }
3137
3138    // Handle increment/decrement operators
3139    if is_increment {
3140        // Check if pre or post increment
3141        let is_pre = operator_position == 0;
3142        if is_pre {
3143            return Some(Expression::PreIncrement {
3144                operand: Box::new(operand_expr),
3145            });
3146        } else {
3147            return Some(Expression::PostIncrement {
3148                operand: Box::new(operand_expr),
3149            });
3150        }
3151    }
3152
3153    if is_decrement {
3154        // Check if pre or post decrement
3155        let is_pre = operator_position == 0;
3156        if is_pre {
3157            return Some(Expression::PreDecrement {
3158                operand: Box::new(operand_expr),
3159            });
3160        } else {
3161            return Some(Expression::PostDecrement {
3162                operand: Box::new(operand_expr),
3163            });
3164        }
3165    }
3166
3167    // Handle other unary operators
3168    if let Some(op) = operator {
3169        return Some(Expression::UnaryOp {
3170            op,
3171            operand: Box::new(operand_expr),
3172        });
3173    }
3174
3175    // DECY-195: Fallback for system headers where tokenization fails
3176    // If we have a UnaryOperator cursor with an operand but couldn't identify the operator,
3177    // try to infer it from context. For macro expansions like EOF=(-1),
3178    // the unary minus might not be tokenizable.
3179    // Check if the operand is an integer literal - if so, it might be a negation
3180    // For now, return the operand wrapped as unary minus if it's an integer
3181    // This handles the common case of EOF = (-1) from stdio.h
3182    if let Expression::IntLiteral(_) = &operand_expr {
3183        // If we found an integer inside a UnaryOperator, assume it's negation
3184        return Some(Expression::UnaryOp {
3185            op: UnaryOperator::Minus,
3186            operand: Box::new(operand_expr),
3187        });
3188    }
3189
3190    None
3191}
3192
3193/// Extract an array indexing expression.
3194fn extract_array_index(cursor: CXCursor) -> Option<Expression> {
3195    // Extract array and index expressions by visiting children
3196    let mut operands: Vec<Expression> = Vec::new();
3197    let operands_ptr = &mut operands as *mut Vec<Expression>;
3198
3199    unsafe {
3200        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
3201    }
3202
3203    // Array subscript should have exactly 2 operands: array and index
3204    if operands.len() != 2 {
3205        return None;
3206    }
3207
3208    Some(Expression::ArrayIndex {
3209        array: Box::new(operands[0].clone()),
3210        index: Box::new(operands[1].clone()),
3211    })
3212}
3213
3214/// Extract a field access expression (obj.field or ptr->field).
3215fn extract_field_access(cursor: CXCursor) -> Option<Expression> {
3216    // Get the field name
3217    let field_name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
3218    let field = unsafe {
3219        let c_str = CStr::from_ptr(clang_getCString(field_name_cxstring));
3220        let name = c_str.to_string_lossy().into_owned();
3221        clang_disposeString(field_name_cxstring);
3222        name
3223    };
3224
3225    // Determine if this is -> or . by tokenizing
3226    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
3227    if tu.is_null() {
3228        return None;
3229    }
3230
3231    let extent = unsafe { clang_getCursorExtent(cursor) };
3232    let mut tokens = ptr::null_mut();
3233    let mut num_tokens = 0;
3234
3235    unsafe {
3236        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
3237    }
3238
3239    let mut is_arrow = false;
3240
3241    // Look through tokens to find the LAST '->' or '.' operator
3242    // (the rightmost operator is the one for this specific MemberRefExpr)
3243    // For nested access like r->bottom_right.x, the extent includes all tokens,
3244    // so we need the last operator, not the first
3245    for i in 0..num_tokens {
3246        unsafe {
3247            let token = *tokens.add(i as usize);
3248            let token_kind = clang_getTokenKind(token);
3249
3250            if token_kind == CXToken_Punctuation {
3251                let token_cxstring = clang_getTokenSpelling(tu, token);
3252                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
3253                if let Ok(token_str) = c_str.to_str() {
3254                    if token_str == "->" {
3255                        is_arrow = true;
3256                        // Don't break - keep looking for later operators
3257                    } else if token_str == "." {
3258                        is_arrow = false;
3259                        // Don't break - keep looking for later operators
3260                    }
3261                }
3262                clang_disposeString(token_cxstring);
3263            }
3264        }
3265    }
3266
3267    unsafe {
3268        clang_disposeTokens(tu, tokens, num_tokens);
3269    }
3270
3271    // Extract the object/pointer expression by visiting children
3272    let mut object_expr: Option<Expression> = None;
3273    let expr_ptr = &mut object_expr as *mut Option<Expression>;
3274
3275    unsafe {
3276        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
3277    }
3278
3279    let object = object_expr?;
3280
3281    if is_arrow {
3282        Some(Expression::PointerFieldAccess {
3283            pointer: Box::new(object),
3284            field,
3285        })
3286    } else {
3287        Some(Expression::FieldAccess {
3288            object: Box::new(object),
3289            field,
3290        })
3291    }
3292}
3293
3294/// Extract a sizeof expression.
3295/// DECY-119: Only match if sizeof is the FIRST token (not from other statements)
3296fn extract_sizeof(cursor: CXCursor) -> Option<Expression> {
3297    // Get the translation unit
3298    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
3299    if tu.is_null() {
3300        return None;
3301    }
3302
3303    // Get the extent (source range) of the cursor
3304    let extent = unsafe { clang_getCursorExtent(cursor) };
3305
3306    // Tokenize to find "sizeof" keyword
3307    let mut tokens = ptr::null_mut();
3308    let mut num_tokens = 0;
3309
3310    unsafe {
3311        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
3312    }
3313
3314    // DECY-119: sizeof must be the FIRST token, otherwise this cursor
3315    // is not a sizeof expression (it might just contain one elsewhere)
3316    if num_tokens == 0 {
3317        unsafe {
3318            clang_disposeTokens(tu, tokens, num_tokens);
3319        }
3320        return None;
3321    }
3322
3323    let first_token_is_sizeof = unsafe {
3324        let token = *tokens.add(0);
3325        let token_cxstring = clang_getTokenSpelling(tu, token);
3326        let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
3327        let is_sizeof = c_str.to_str().map(|s| s == "sizeof").unwrap_or(false);
3328        clang_disposeString(token_cxstring);
3329        is_sizeof
3330    };
3331
3332    if !first_token_is_sizeof {
3333        unsafe {
3334            clang_disposeTokens(tu, tokens, num_tokens);
3335        }
3336        return None;
3337    }
3338
3339    let mut type_name = String::new();
3340    let mut paren_depth = 0;
3341    let mut in_sizeof_parens = false;
3342
3343    // Look through tokens to extract type name (skip first token which is "sizeof")
3344    // DECY-119: Track paren depth to stop at closing paren
3345    for i in 1..num_tokens {
3346        unsafe {
3347            let token = *tokens.add(i as usize);
3348            let token_kind = clang_getTokenKind(token);
3349            let token_cxstring = clang_getTokenSpelling(tu, token);
3350            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
3351
3352            if let Ok(token_str) = c_str.to_str() {
3353                if token_str == "(" {
3354                    paren_depth += 1;
3355                    in_sizeof_parens = true;
3356                } else if token_str == ")" {
3357                    paren_depth -= 1;
3358                    // DECY-119: Stop when we close the sizeof parenthesis
3359                    if paren_depth == 0 && in_sizeof_parens {
3360                        clang_disposeString(token_cxstring);
3361                        break;
3362                    }
3363                } else if in_sizeof_parens
3364                    && (token_kind == CXToken_Identifier || token_kind == CXToken_Keyword)
3365                {
3366                    // This is part of the type name (e.g., "int", "Data", "struct")
3367                    if !type_name.is_empty() {
3368                        type_name.push(' ');
3369                    }
3370                    type_name.push_str(token_str);
3371                }
3372            }
3373
3374            clang_disposeString(token_cxstring);
3375        }
3376    }
3377
3378    unsafe {
3379        clang_disposeTokens(tu, tokens, num_tokens);
3380    }
3381
3382    // We already verified first token is sizeof, just check we got a type name
3383    if !type_name.is_empty() {
3384        Some(Expression::Sizeof { type_name })
3385    } else {
3386        None
3387    }
3388}
3389
3390/// Convert clang type to our Type enum.
3391#[allow(non_upper_case_globals)]
3392/// Extract a cast expression from a clang cursor.
3393///
3394/// Parses C-style cast expressions like `(int)x` or `(void*)ptr`.
3395/// Extracts the target type and the expression being cast.
3396fn extract_cast(cursor: CXCursor) -> Option<Expression> {
3397    // SAFETY: Getting the type this expression evaluates to (the cast result type)
3398    let target_cx_type = unsafe { clang_getCursorType(cursor) };
3399    let target_type = convert_type(target_cx_type)?;
3400
3401    // Extract the inner expression by visiting children
3402    let mut inner_expr: Option<Expression> = None;
3403    let inner_ptr = &mut inner_expr as *mut Option<Expression>;
3404
3405    unsafe {
3406        clang_visitChildren(cursor, visit_cast_inner, inner_ptr as CXClientData);
3407    }
3408
3409    inner_expr.map(|expr| Expression::Cast {
3410        target_type,
3411        expr: Box::new(expr),
3412    })
3413}
3414
3415/// Visitor callback to extract the inner expression of a cast.
3416#[allow(non_upper_case_globals)]
3417extern "C" fn visit_cast_inner(
3418    cursor: CXCursor,
3419    _parent: CXCursor,
3420    client_data: CXClientData,
3421) -> CXChildVisitResult {
3422    let inner_expr = unsafe { &mut *(client_data as *mut Option<Expression>) };
3423    let kind = unsafe { clang_getCursorKind(cursor) };
3424
3425    // Try to extract any expression
3426    if let Some(expr) = try_extract_expression(cursor) {
3427        *inner_expr = Some(expr);
3428        return CXChildVisit_Break; // Found the inner expression, stop visiting
3429    }
3430
3431    // For some expression types, we need to recurse
3432    match kind {
3433        CXCursor_UnexposedExpr | CXCursor_ParenExpr => CXChildVisit_Recurse,
3434        _ => CXChildVisit_Continue,
3435    }
3436}
3437
3438/// Extract a compound literal expression from a clang cursor.
3439///
3440/// Parses C99 compound literals like `(struct Point){10, 20}` or `(int[]){1, 2, 3}`.
3441/// Extracts the type and initializer expressions.
3442fn extract_compound_literal(cursor: CXCursor) -> Option<Expression> {
3443    // SAFETY: Getting the type of the compound literal
3444    let literal_cx_type = unsafe { clang_getCursorType(cursor) };
3445    let literal_type = convert_type(literal_cx_type)?;
3446
3447    // Extract initializer expressions by visiting children
3448    let mut initializers: Vec<Expression> = Vec::new();
3449    let initializers_ptr = &mut initializers as *mut Vec<Expression>;
3450
3451    unsafe {
3452        clang_visitChildren(
3453            cursor,
3454            visit_compound_literal_initializers,
3455            initializers_ptr as CXClientData,
3456        );
3457    }
3458
3459    Some(Expression::CompoundLiteral {
3460        literal_type,
3461        initializers,
3462    })
3463}
3464
3465/// DECY-192: Extract a ternary/conditional expression.
3466///
3467/// Parses C conditional expressions like `cond ? then_val : else_val`.
3468/// The ternary operator has 3 children: condition, then expression, else expression.
3469fn extract_conditional_op(cursor: CXCursor) -> Option<Expression> {
3470    // Extract all three operands by visiting children
3471    let mut operands: Vec<Expression> = Vec::new();
3472    let operands_ptr = &mut operands as *mut Vec<Expression>;
3473
3474    unsafe {
3475        clang_visitChildren(cursor, visit_conditional_operand, operands_ptr as CXClientData);
3476    }
3477
3478    // Ternary operators should have exactly 3 operands: condition, then, else
3479    // However, sometimes clang may emit extra implicit expressions
3480    if operands.len() >= 3 {
3481        Some(Expression::Ternary {
3482            condition: Box::new(operands[0].clone()),
3483            then_expr: Box::new(operands[1].clone()),
3484            else_expr: Box::new(operands[2].clone()),
3485        })
3486    } else if operands.len() == 2 {
3487        // GNU extension: `x ?: y` is equivalent to `x ? x : y`
3488        // Clang may represent this with only 2 children
3489        Some(Expression::Ternary {
3490            condition: Box::new(operands[0].clone()),
3491            then_expr: Box::new(operands[0].clone()),
3492            else_expr: Box::new(operands[1].clone()),
3493        })
3494    } else {
3495        None
3496    }
3497}
3498
3499/// Visitor callback for conditional operator (ternary) operands.
3500/// DECY-192: Collects condition, then_expr, and else_expr.
3501#[allow(non_upper_case_globals)]
3502extern "C" fn visit_conditional_operand(
3503    cursor: CXCursor,
3504    _parent: CXCursor,
3505    client_data: CXClientData,
3506) -> CXChildVisitResult {
3507    let operands = unsafe { &mut *(client_data as *mut Vec<Expression>) };
3508
3509    // Try to extract expression using the general expression extractor
3510    if let Some(expr) = try_extract_expression(cursor) {
3511        operands.push(expr);
3512    }
3513
3514    CXChildVisit_Continue
3515}
3516
3517/// DECY-133: Extract an initializer list expression for struct/array initialization.
3518///
3519/// Handles C99 designated initializers like `{.x = 10, .y = 20}` or `{[2] = 100}`.
3520/// Clang resolves designated initializers to positional order and inserts ImplicitValueInitExpr
3521/// for uninitialized fields.
3522fn extract_init_list(cursor: CXCursor) -> Option<Expression> {
3523    // SAFETY: Getting the type of the initializer list
3524    let literal_cx_type = unsafe { clang_getCursorType(cursor) };
3525    let literal_type = convert_type(literal_cx_type)?;
3526
3527    // Extract initializer expressions by visiting children
3528    let mut initializers: Vec<Expression> = Vec::new();
3529    let initializers_ptr = &mut initializers as *mut Vec<Expression>;
3530
3531    unsafe {
3532        clang_visitChildren(
3533            cursor,
3534            visit_init_list_children,
3535            initializers_ptr as CXClientData,
3536        );
3537    }
3538
3539    Some(Expression::CompoundLiteral {
3540        literal_type,
3541        initializers,
3542    })
3543}
3544
3545/// Visitor callback to extract initializers from an InitListExpr.
3546/// DECY-133: Handles both regular and designated initializers.
3547#[allow(non_upper_case_globals)]
3548extern "C" fn visit_init_list_children(
3549    cursor: CXCursor,
3550    _parent: CXCursor,
3551    client_data: CXClientData,
3552) -> CXChildVisitResult {
3553    let initializers = unsafe { &mut *(client_data as *mut Vec<Expression>) };
3554    let kind = unsafe { clang_getCursorKind(cursor) };
3555
3556    // Handle ImplicitValueInitExpr (115) - default value for uninitialized fields
3557    // This appears when designated initializers skip some fields
3558    if kind == 115 {
3559        // Get the type to determine the default value
3560        let cx_type = unsafe { clang_getCursorType(cursor) };
3561        if let Some(var_type) = convert_type(cx_type) {
3562            // Generate appropriate default based on type
3563            let default_expr = match var_type {
3564                Type::Int => Expression::IntLiteral(0),
3565                Type::Float | Type::Double => Expression::IntLiteral(0), // Will be cast
3566                Type::Char => Expression::IntLiteral(0),
3567                _ => Expression::IntLiteral(0), // Fallback
3568            };
3569            initializers.push(default_expr);
3570        }
3571        return CXChildVisit_Continue;
3572    }
3573
3574    // DECY-133b: Handle designated initializers
3575    // Array: [idx] = value  → UnexposedExpr with children [IntLiteral(idx), value]
3576    // Struct: .field = value → UnexposedExpr with children [MemberRef, value]
3577    if kind == CXCursor_UnexposedExpr {
3578        // Collect cursor kinds and expressions for all children
3579        #[repr(C)]
3580        struct ChildInfo {
3581            kinds: Vec<u32>,
3582            exprs: Vec<Expression>,
3583        }
3584
3585        let mut info = ChildInfo {
3586            kinds: Vec::new(),
3587            exprs: Vec::new(),
3588        };
3589        let info_ptr = &mut info as *mut ChildInfo;
3590
3591        extern "C" fn collect_child_info(
3592            cursor: CXCursor,
3593            _parent: CXCursor,
3594            client_data: CXClientData,
3595        ) -> CXChildVisitResult {
3596            let info = unsafe { &mut *(client_data as *mut ChildInfo) };
3597            let kind = unsafe { clang_getCursorKind(cursor) };
3598            info.kinds.push(kind as u32);
3599
3600            // Try to extract expression, including InitListExpr
3601            if kind == 119 {
3602                // InitListExpr - extract as CompoundLiteral
3603                if let Some(expr) = extract_init_list(cursor) {
3604                    info.exprs.push(expr);
3605                }
3606            } else if let Some(expr) = try_extract_expression(cursor) {
3607                info.exprs.push(expr);
3608            }
3609            CXChildVisit_Continue
3610        }
3611
3612        unsafe {
3613            clang_visitChildren(cursor, collect_child_info, info_ptr as CXClientData);
3614        }
3615
3616        // Array designated init: [idx] = value → 2 children, first is IntLiteral
3617        if info.exprs.len() == 2 && matches!(&info.exprs[0], Expression::IntLiteral(_)) {
3618            initializers.push(info.exprs[1].clone());
3619            return CXChildVisit_Continue;
3620        }
3621
3622        // Struct field designated init: .field = value → first kind is MemberRef (47)
3623        // Second child is the value (could be InitListExpr or other expression)
3624        if info.kinds.len() == 2 && info.kinds[0] == 47 && !info.exprs.is_empty() {
3625            // Take the last expression (the value)
3626            initializers.push(info.exprs.last().unwrap().clone());
3627            return CXChildVisit_Continue;
3628        }
3629
3630        // Not a designated initializer - fall through to recursion
3631        return CXChildVisit_Recurse;
3632    }
3633
3634    // Try to extract any expression as an initializer
3635    if let Some(expr) = try_extract_expression(cursor) {
3636        initializers.push(expr);
3637        return CXChildVisit_Continue;
3638    }
3639
3640    // For some expression types, recurse
3641    match kind {
3642        CXCursor_ParenExpr => CXChildVisit_Recurse,
3643        _ => CXChildVisit_Continue,
3644    }
3645}
3646
3647/// Visitor callback to extract initializers from a compound literal.
3648#[allow(non_upper_case_globals)]
3649extern "C" fn visit_compound_literal_initializers(
3650    cursor: CXCursor,
3651    _parent: CXCursor,
3652    client_data: CXClientData,
3653) -> CXChildVisitResult {
3654    let initializers = unsafe { &mut *(client_data as *mut Vec<Expression>) };
3655    let kind = unsafe { clang_getCursorKind(cursor) };
3656
3657    // The compound literal typically has an InitListExpr child
3658    // CXCursor_InitListExpr = 119
3659    if kind == 119 {
3660        // This is the initializer list - visit its children to get individual initializers
3661        return CXChildVisit_Recurse;
3662    }
3663
3664    // Try to extract any expression as an initializer
3665    if let Some(expr) = try_extract_expression(cursor) {
3666        initializers.push(expr);
3667        return CXChildVisit_Continue;
3668    }
3669
3670    // For some expression types, recurse
3671    match kind {
3672        CXCursor_UnexposedExpr | CXCursor_ParenExpr => CXChildVisit_Recurse,
3673        _ => CXChildVisit_Continue,
3674    }
3675}
3676
3677#[allow(non_upper_case_globals)]
3678fn convert_type(cx_type: CXType) -> Option<Type> {
3679    // SAFETY: Getting type kind
3680    match cx_type.kind {
3681        CXType_Void => Some(Type::Void),
3682        CXType_Int => Some(Type::Int),
3683        CXType_UInt => Some(Type::UnsignedInt), // DECY-158: unsigned int → u32
3684        CXType_UChar => Some(Type::Char),       // unsigned char → u8 (DECY-057 fix)
3685        CXType_UShort => Some(Type::UnsignedInt), // unsigned short → u32 (safe approximation)
3686        CXType_ULong => Some(Type::UnsignedInt), // unsigned long → u32 (safe approximation)
3687        CXType_Short => Some(Type::Int),        // short → i32
3688        CXType_Long => Some(Type::Int),         // long → i32
3689        CXType_LongLong => Some(Type::Int),     // long long → i32 (simplified)
3690        CXType_ULongLong => Some(Type::UnsignedInt), // DECY-158: unsigned long long → u32
3691        CXType_Float => Some(Type::Float),
3692        CXType_Double => Some(Type::Double),
3693        CXType_Char_S | CXType_Char_U => Some(Type::Char),
3694        CXType_Pointer => {
3695            // SAFETY: Getting pointee type
3696            let pointee = unsafe { clang_getPointeeType(cx_type) };
3697
3698            // Check if the pointee is a function - this is a function pointer
3699            if pointee.kind == CXType_FunctionProto || pointee.kind == CXType_FunctionNoProto {
3700                // This is a function pointer type
3701                // Extract return type
3702                let return_cx_type = unsafe { clang_getResultType(pointee) };
3703                let return_type = convert_type(return_cx_type)?;
3704
3705                // Extract parameter types
3706                let num_args = unsafe { clang_getNumArgTypes(pointee) };
3707                let mut param_types = Vec::new();
3708
3709                for i in 0..num_args {
3710                    let arg_type = unsafe { clang_getArgType(pointee, i as u32) };
3711                    if let Some(param_type) = convert_type(arg_type) {
3712                        param_types.push(param_type);
3713                    }
3714                }
3715
3716                return Some(Type::FunctionPointer {
3717                    param_types,
3718                    return_type: Box::new(return_type),
3719                });
3720            }
3721
3722            // Regular pointer (not function pointer)
3723            convert_type(pointee).map(|t| Type::Pointer(Box::new(t)))
3724        }
3725        CXType_FunctionProto | CXType_FunctionNoProto => {
3726            // Function type (not a pointer to function, but the function type itself)
3727            // This can occur in typedefs like: typedef int Func(int);
3728            // Extract return type
3729            let return_cx_type = unsafe { clang_getResultType(cx_type) };
3730            let return_type = convert_type(return_cx_type)?;
3731
3732            // Extract parameter types
3733            let num_args = unsafe { clang_getNumArgTypes(cx_type) };
3734            let mut param_types = Vec::new();
3735
3736            for i in 0..num_args {
3737                let arg_type = unsafe { clang_getArgType(cx_type, i as u32) };
3738                if let Some(param_type) = convert_type(arg_type) {
3739                    param_types.push(param_type);
3740                }
3741            }
3742
3743            Some(Type::FunctionPointer {
3744                param_types,
3745                return_type: Box::new(return_type),
3746            })
3747        }
3748        CXType_Record => {
3749            // SAFETY: Getting type declaration to extract struct name
3750            let decl = unsafe { clang_getTypeDeclaration(cx_type) };
3751            let name_cxstring = unsafe { clang_getCursorSpelling(decl) };
3752            let name = unsafe {
3753                let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
3754                let struct_name = c_str.to_string_lossy().into_owned();
3755                clang_disposeString(name_cxstring);
3756                struct_name
3757            };
3758            Some(Type::Struct(name))
3759        }
3760        CXType_Elaborated => {
3761            // Elaborated types wrap other types (e.g., "struct Point" wraps the Record type)
3762            // Get the canonical type to unwrap it
3763            let canonical = unsafe { clang_getCanonicalType(cx_type) };
3764            convert_type(canonical)
3765        }
3766        CXType_Typedef => {
3767            // DECY-172: Get typedef name first to check for known type aliases
3768            let typedef_decl = unsafe { clang_getTypeDeclaration(cx_type) };
3769            let typedef_name_cxstring = unsafe { clang_getCursorSpelling(typedef_decl) };
3770            let typedef_name = unsafe {
3771                let c_str = CStr::from_ptr(clang_getCString(typedef_name_cxstring));
3772                let tn = c_str.to_string_lossy().into_owned();
3773                clang_disposeString(typedef_name_cxstring);
3774                tn
3775            };
3776
3777            // DECY-172: Preserve size_t, ssize_t, ptrdiff_t as TypeAlias
3778            // These need to map to usize/isize in Rust for compatibility with .len() etc.
3779            match typedef_name.as_str() {
3780                "size_t" | "ssize_t" | "ptrdiff_t" => {
3781                    return Some(Type::TypeAlias(typedef_name));
3782                }
3783                _ => {}
3784            }
3785
3786            // DECY-147: For typedefs to anonymous structs, use typedef name as struct name
3787            // Example: typedef struct { int x; } Point; → Type::Struct("Point")
3788            let canonical = unsafe { clang_getCanonicalType(cx_type) };
3789
3790            // Check if this is a typedef to an anonymous struct
3791            if canonical.kind == CXType_Record {
3792                let decl = unsafe { clang_getTypeDeclaration(canonical) };
3793                let struct_name_cxstring = unsafe { clang_getCursorSpelling(decl) };
3794                let struct_name = unsafe {
3795                    let c_str = CStr::from_ptr(clang_getCString(struct_name_cxstring));
3796                    let sn = c_str.to_string_lossy().into_owned();
3797                    clang_disposeString(struct_name_cxstring);
3798                    sn
3799                };
3800
3801                // If struct is anonymous, use the typedef name instead
3802                if struct_name.is_empty() {
3803                    return Some(Type::Struct(typedef_name));
3804                }
3805            }
3806
3807            // Default: recursively convert the canonical type
3808            convert_type(canonical)
3809        }
3810        CXType_ConstantArray => {
3811            // Array type - extract element type and size
3812            let element_cx_type = unsafe { clang_getArrayElementType(cx_type) };
3813            let element_type = convert_type(element_cx_type)?;
3814
3815            // Get array size
3816            let array_size = unsafe { clang_getArraySize(cx_type) };
3817            let size = if array_size >= 0 {
3818                Some(array_size)
3819            } else {
3820                None
3821            };
3822
3823            Some(Type::Array {
3824                element_type: Box::new(element_type),
3825                size,
3826            })
3827        }
3828        114 => {
3829            // CXType_IncompleteArray - flexible array member (C99 §6.7.2.1)
3830            // DECY-136: char data[] → Vec<u8>
3831            // Flexible array members have no size specified
3832            let element_cx_type = unsafe { clang_getArrayElementType(cx_type) };
3833            let element_type = convert_type(element_cx_type)?;
3834
3835            // Generate as Array with size None (will be transformed to Vec in codegen)
3836            Some(Type::Array {
3837                element_type: Box::new(element_type),
3838                size: None,
3839            })
3840        }
3841        _ => None,
3842    }
3843}
3844
3845/// Represents a single case in a switch statement.
3846#[derive(Debug, Clone, PartialEq)]
3847pub struct SwitchCase {
3848    /// Case value expression (None for default case)
3849    pub value: Option<Expression>,
3850    /// Statements to execute for this case
3851    pub body: Vec<Statement>,
3852}
3853
3854/// Represents a C statement.
3855#[derive(Debug, Clone, PartialEq)]
3856pub enum Statement {
3857    /// Variable declaration: `int* ptr = malloc(4);`
3858    VariableDeclaration {
3859        /// Variable name
3860        name: String,
3861        /// Variable type
3862        var_type: Type,
3863        /// Optional initializer expression
3864        initializer: Option<Expression>,
3865    },
3866    /// Return statement: `return expr;`
3867    Return(Option<Expression>),
3868    /// Assignment statement: `x = 42;`
3869    Assignment {
3870        /// Target variable name
3871        target: String,
3872        /// Value expression to assign
3873        value: Expression,
3874    },
3875    /// If statement: `if (cond) { ... } else { ... }`
3876    If {
3877        /// Condition expression
3878        condition: Expression,
3879        /// Then block
3880        then_block: Vec<Statement>,
3881        /// Optional else block
3882        else_block: Option<Vec<Statement>>,
3883    },
3884    /// For loop: `for (init; cond; inc) { ... }`
3885    For {
3886        /// Optional init statement
3887        init: Option<Box<Statement>>,
3888        /// Optional condition expression
3889        condition: Option<Expression>,
3890        /// Optional increment statement
3891        increment: Option<Box<Statement>>,
3892        /// Loop body
3893        body: Vec<Statement>,
3894    },
3895    /// While loop: `while (cond) { ... }`
3896    While {
3897        /// Condition expression
3898        condition: Expression,
3899        /// Loop body
3900        body: Vec<Statement>,
3901    },
3902    /// Pointer dereference assignment: `*ptr = value;`
3903    DerefAssignment {
3904        /// Target expression to dereference
3905        target: Expression,
3906        /// Value expression to assign
3907        value: Expression,
3908    },
3909    /// Array index assignment: `arr[i] = value;`
3910    ArrayIndexAssignment {
3911        /// Array expression
3912        array: Box<Expression>,
3913        /// Index expression
3914        index: Box<Expression>,
3915        /// Value expression to assign
3916        value: Expression,
3917    },
3918    /// Field assignment: `ptr->field = value;` or `obj.field = value;`
3919    FieldAssignment {
3920        /// Object/pointer expression
3921        object: Expression,
3922        /// Field name
3923        field: String,
3924        /// Value expression to assign
3925        value: Expression,
3926    },
3927    /// Break statement: `break;`
3928    Break,
3929    /// Continue statement: `continue;`
3930    Continue,
3931    /// Switch statement: `switch (expr) { case 1: ...; default: ...; }`
3932    Switch {
3933        /// Condition expression to switch on
3934        condition: Expression,
3935        /// List of case statements
3936        cases: Vec<SwitchCase>,
3937        /// Optional default case body
3938        default_case: Option<Vec<Statement>>,
3939    },
3940    /// Post-increment statement: `ptr++;`
3941    PostIncrement {
3942        /// Target variable name
3943        target: String,
3944    },
3945    /// Pre-increment statement: `++ptr;`
3946    PreIncrement {
3947        /// Target variable name
3948        target: String,
3949    },
3950    /// Post-decrement statement: `ptr--;`
3951    PostDecrement {
3952        /// Target variable name
3953        target: String,
3954    },
3955    /// Pre-decrement statement: `--ptr;`
3956    PreDecrement {
3957        /// Target variable name
3958        target: String,
3959    },
3960    /// Compound assignment: `ptr += offset;`, `x *= 2;`, etc.
3961    CompoundAssignment {
3962        /// Target variable name
3963        target: String,
3964        /// Binary operator to apply
3965        op: BinaryOperator,
3966        /// Value expression
3967        value: Expression,
3968    },
3969    /// DECY-185: Compound assignment to expression target: `*ptr *= 2;`, `sb->capacity *= 2;`
3970    /// Used when target is not a simple variable (Dereference, PointerFieldAccess, etc.)
3971    DerefCompoundAssignment {
3972        /// Target expression (e.g., the dereferenced pointer or field access)
3973        target: Expression,
3974        /// Binary operator to apply
3975        op: BinaryOperator,
3976        /// Value expression
3977        value: Expression,
3978    },
3979    /// Function call statement: `strlen(s);`, `strcpy(dst, src);`
3980    FunctionCall {
3981        /// Function name
3982        function: String,
3983        /// Arguments
3984        arguments: Vec<Expression>,
3985    },
3986}
3987
3988impl Statement {
3989    /// Check if this statement is a string function call.
3990    pub fn is_string_function_call(&self) -> bool {
3991        match self {
3992            Statement::FunctionCall { function, .. } => {
3993                matches!(function.as_str(), "strlen" | "strcmp" | "strcpy" | "strdup")
3994            }
3995            _ => false,
3996        }
3997    }
3998
3999    /// Check if this statement is a function call.
4000    pub fn is_function_call(&self) -> bool {
4001        matches!(self, Statement::FunctionCall { .. })
4002    }
4003
4004    /// Convert this statement to a function call expression if it is one.
4005    ///
4006    /// # Implementation Status
4007    ///
4008    /// Stub implementation - always returns `None`.
4009    /// The `Statement::FunctionCall` variant doesn't store the call as an `Expression`,
4010    /// so conversion would require reconstructing an `Expression::FunctionCall` from
4011    /// the statement's fields.
4012    pub fn as_function_call(&self) -> Option<&Expression> {
4013        None
4014    }
4015}
4016
4017/// Unary operators for C expressions.
4018#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4019pub enum UnaryOperator {
4020    /// Unary minus (-x)
4021    Minus,
4022    /// Logical NOT (!x)
4023    LogicalNot,
4024    /// Bitwise NOT (~x)
4025    BitwiseNot,
4026    /// Address-of (&x)
4027    AddressOf,
4028}
4029
4030/// Binary operators for C expressions.
4031#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4032pub enum BinaryOperator {
4033    /// Addition (+)
4034    Add,
4035    /// Subtraction (-)
4036    Subtract,
4037    /// Multiplication (*)
4038    Multiply,
4039    /// Division (/)
4040    Divide,
4041    /// Modulo (%)
4042    Modulo,
4043    /// Equality (==)
4044    Equal,
4045    /// Inequality (!=)
4046    NotEqual,
4047    /// Less than (<)
4048    LessThan,
4049    /// Greater than (>)
4050    GreaterThan,
4051    /// Less than or equal (<=)
4052    LessEqual,
4053    /// Greater than or equal (>=)
4054    GreaterEqual,
4055    /// Logical AND (&&)
4056    LogicalAnd,
4057    /// Logical OR (||)
4058    LogicalOr,
4059    /// Left shift (<<)
4060    LeftShift,
4061    /// Right shift (>>)
4062    RightShift,
4063    /// Bitwise AND (&)
4064    BitwiseAnd,
4065    /// Bitwise OR (|)
4066    BitwiseOr,
4067    /// Bitwise XOR (^)
4068    BitwiseXor,
4069    /// Assignment (=) - used for embedded assignments like (c=getchar())
4070    Assign,
4071}
4072
4073/// Represents a C expression.
4074#[derive(Debug, Clone, PartialEq)]
4075pub enum Expression {
4076    /// Integer literal: `42`
4077    IntLiteral(i32),
4078    /// Float literal: `3.14` (stored as string to preserve precision)
4079    FloatLiteral(String),
4080    /// String literal: `"hello"`
4081    StringLiteral(String),
4082    /// Character literal: `'a'`, `'\0'`, `'\n'`
4083    CharLiteral(i8),
4084    /// Variable reference: `x`
4085    Variable(String),
4086    /// Binary operation: `a + b`
4087    BinaryOp {
4088        /// Operator
4089        op: BinaryOperator,
4090        /// Left operand
4091        left: Box<Expression>,
4092        /// Right operand
4093        right: Box<Expression>,
4094    },
4095    /// Function call: `malloc(4)`
4096    FunctionCall {
4097        /// Function name
4098        function: String,
4099        /// Arguments
4100        arguments: Vec<Expression>,
4101    },
4102    /// Pointer dereference: `*ptr`
4103    Dereference(Box<Expression>),
4104    /// Unary operation: `-x`, `!x`
4105    UnaryOp {
4106        /// Operator
4107        op: UnaryOperator,
4108        /// Operand
4109        operand: Box<Expression>,
4110    },
4111    /// Array indexing: `arr[i]`
4112    ArrayIndex {
4113        /// Array expression
4114        array: Box<Expression>,
4115        /// Index expression
4116        index: Box<Expression>,
4117    },
4118    /// Struct field access: `obj.field`
4119    FieldAccess {
4120        /// Object expression
4121        object: Box<Expression>,
4122        /// Field name
4123        field: String,
4124    },
4125    /// Pointer field access: `ptr->field`
4126    PointerFieldAccess {
4127        /// Pointer expression
4128        pointer: Box<Expression>,
4129        /// Field name
4130        field: String,
4131    },
4132    /// Post-increment expression: `ptr++`
4133    PostIncrement {
4134        /// Operand expression
4135        operand: Box<Expression>,
4136    },
4137    /// Pre-increment expression: `++ptr`
4138    PreIncrement {
4139        /// Operand expression
4140        operand: Box<Expression>,
4141    },
4142    /// Post-decrement expression: `ptr--`
4143    PostDecrement {
4144        /// Operand expression
4145        operand: Box<Expression>,
4146    },
4147    /// Pre-decrement expression: `--ptr`
4148    PreDecrement {
4149        /// Operand expression
4150        operand: Box<Expression>,
4151    },
4152    /// Sizeof expression: `sizeof(int)` or `sizeof(struct Data)`
4153    Sizeof {
4154        /// Type name as a string (e.g., "int", "struct Data")
4155        type_name: String,
4156    },
4157    /// Cast expression: `(int)x` or `(void*)ptr`
4158    ///
4159    /// C-style cast that converts an expression to a target type.
4160    /// Maps to Rust `as` operator for safe casts, or `transmute` for unsafe casts.
4161    ///
4162    /// # Examples
4163    ///
4164    /// ```c
4165    /// int x = (int)3.14;           // float to int
4166    /// void* ptr = (void*)buffer;   // pointer cast
4167    /// long l = (long)small_int;    // widening cast
4168    /// ```
4169    Cast {
4170        /// Target type to cast to
4171        target_type: Type,
4172        /// Expression being cast
4173        expr: Box<Expression>,
4174    },
4175    /// Compound literal: `(struct Point){10, 20}` or `(int[]){1, 2, 3}`
4176    ///
4177    /// C99 compound literals create anonymous objects of a specified type.
4178    /// Useful for passing struct values to functions or creating temporary objects.
4179    ///
4180    /// # Examples
4181    ///
4182    /// ```c
4183    /// struct Point p = (struct Point){10, 20};       // struct compound literal
4184    /// int* arr = (int[]){1, 2, 3, 4, 5};             // array compound literal
4185    /// draw((struct Rect){.x=0, .y=0, .w=100, .h=50}); // with designated initializers
4186    /// ```
4187    CompoundLiteral {
4188        /// Type of the compound literal (struct Point, int[], etc.)
4189        literal_type: Type,
4190        /// Initializer expressions (values for struct fields or array elements)
4191        initializers: Vec<Expression>,
4192    },
4193    /// Ternary/Conditional expression: `cond ? then_val : else_val`
4194    ///
4195    /// The C ternary operator evaluates the condition and returns either
4196    /// the then_val or else_val based on whether condition is truthy.
4197    ///
4198    /// # DECY-192
4199    ///
4200    /// Added to support K&R Chapter 2.11 Conditional Expressions.
4201    ///
4202    /// # Examples
4203    ///
4204    /// ```c
4205    /// int max = (a > b) ? a : b;
4206    /// char* msg = (x == 0) ? "zero" : "nonzero";
4207    /// ```
4208    Ternary {
4209        /// Condition expression (evaluated as boolean)
4210        condition: Box<Expression>,
4211        /// Value if condition is true
4212        then_expr: Box<Expression>,
4213        /// Value if condition is false
4214        else_expr: Box<Expression>,
4215    },
4216}
4217
4218impl Expression {
4219    /// Check if this expression is a string function call (strlen, strcmp, strcpy, strdup).
4220    pub fn is_string_function_call(&self) -> bool {
4221        match self {
4222            Expression::FunctionCall { function, .. } => {
4223                matches!(function.as_str(), "strlen" | "strcmp" | "strcpy" | "strdup")
4224            }
4225            _ => false,
4226        }
4227    }
4228
4229    /// Get the string function name if this is a string function call.
4230    pub fn string_function_name(&self) -> Option<&str> {
4231        match self {
4232            Expression::FunctionCall { function, .. } if self.is_string_function_call() => {
4233                Some(function.as_str())
4234            }
4235            _ => None,
4236        }
4237    }
4238
4239    /// Check if this expression has a string literal argument.
4240    pub fn has_string_literal_argument(&self) -> bool {
4241        match self {
4242            Expression::FunctionCall { arguments, .. } => arguments
4243                .iter()
4244                .any(|arg| matches!(arg, Expression::StringLiteral(_))),
4245            _ => false,
4246        }
4247    }
4248}
4249
4250/// Represents a C typedef declaration.
4251#[derive(Debug, Clone, PartialEq)]
4252pub struct Typedef {
4253    /// Typedef name (the alias)
4254    pub name: String,
4255    /// Underlying type being aliased
4256    pub underlying_type: Type,
4257}
4258
4259impl Typedef {
4260    /// Create a new typedef.
4261    pub fn new(name: String, underlying_type: Type) -> Self {
4262        Self {
4263            name,
4264            underlying_type,
4265        }
4266    }
4267
4268    /// Get the typedef name.
4269    pub fn name(&self) -> &str {
4270        &self.name
4271    }
4272
4273    /// Get the underlying type as a string representation.
4274    pub fn underlying_type(&self) -> &str {
4275        // Return a string representation of the type
4276        match &self.underlying_type {
4277            Type::Void => "void",
4278            Type::Int => "int",
4279            Type::UnsignedInt => "unsigned int", // DECY-158
4280            Type::Float => "float",
4281            Type::Double => "double",
4282            Type::Char => "char",
4283            Type::Pointer(inner) => match **inner {
4284                Type::Char => "char*",
4285                Type::Int => "int*",
4286                Type::UnsignedInt => "unsigned int*", // DECY-158
4287                Type::Float => "float*",
4288                Type::Double => "double*",
4289                Type::Void => "void*",
4290                _ => "pointer",
4291            },
4292            Type::Struct(name) => name,
4293            Type::FunctionPointer { .. } => "function pointer",
4294            Type::Array { .. } => "array",
4295            // DECY-172: TypeAlias returns the alias name
4296            Type::TypeAlias(name) => name,
4297        }
4298    }
4299
4300    /// Check if this typedef is a pointer type.
4301    pub fn is_pointer(&self) -> bool {
4302        matches!(self.underlying_type, Type::Pointer(_))
4303    }
4304
4305    /// Check if this typedef is a struct type.
4306    pub fn is_struct(&self) -> bool {
4307        matches!(self.underlying_type, Type::Struct(_))
4308    }
4309
4310    /// Check if this typedef is a function pointer type.
4311    pub fn is_function_pointer(&self) -> bool {
4312        matches!(self.underlying_type, Type::FunctionPointer { .. })
4313    }
4314
4315    /// Check if this typedef is an array type.
4316    pub fn is_array(&self) -> bool {
4317        // Arrays are not yet in the Type enum, so return false for now
4318        false
4319    }
4320}
4321
4322/// Represents a struct field.
4323#[derive(Debug, Clone, PartialEq)]
4324pub struct StructField {
4325    /// Field name
4326    pub name: String,
4327    /// Field type
4328    pub field_type: Type,
4329}
4330
4331impl StructField {
4332    /// Create a new struct field.
4333    pub fn new(name: String, field_type: Type) -> Self {
4334        Self { name, field_type }
4335    }
4336
4337    /// Get the field name.
4338    pub fn name(&self) -> &str {
4339        &self.name
4340    }
4341
4342    /// Check if this field is a function pointer.
4343    pub fn is_function_pointer(&self) -> bool {
4344        matches!(self.field_type, Type::FunctionPointer { .. })
4345    }
4346}
4347
4348/// Represents a struct definition.
4349#[derive(Debug, Clone, PartialEq)]
4350pub struct Struct {
4351    /// Struct name
4352    pub name: String,
4353    /// Struct fields
4354    pub fields: Vec<StructField>,
4355}
4356
4357impl Struct {
4358    /// Create a new struct.
4359    pub fn new(name: String, fields: Vec<StructField>) -> Self {
4360        Self { name, fields }
4361    }
4362
4363    /// Get the struct name.
4364    pub fn name(&self) -> &str {
4365        &self.name
4366    }
4367
4368    /// Get the struct fields.
4369    pub fn fields(&self) -> &[StructField] {
4370        &self.fields
4371    }
4372}
4373
4374/// Represents a variable declaration.
4375#[derive(Debug, Clone, PartialEq)]
4376pub struct Variable {
4377    /// Variable name
4378    name: String,
4379    /// Variable type
4380    var_type: Type,
4381    /// Optional initializer expression
4382    initializer: Option<Expression>,
4383    /// Static storage class (file-local)
4384    is_static: bool,
4385    /// Extern storage class (external linkage)
4386    is_extern: bool,
4387    /// Const qualifier (immutable)
4388    is_const: bool,
4389}
4390
4391impl Variable {
4392    /// Create a new variable.
4393    pub fn new(name: String, var_type: Type) -> Self {
4394        Self {
4395            name,
4396            var_type,
4397            initializer: None,
4398            is_static: false,
4399            is_extern: false,
4400            is_const: false,
4401        }
4402    }
4403
4404    /// Create a new variable with an initializer.
4405    pub fn new_with_initializer(name: String, var_type: Type, initializer: Expression) -> Self {
4406        Self {
4407            name,
4408            var_type,
4409            initializer: Some(initializer),
4410            is_static: false,
4411            is_extern: false,
4412            is_const: false,
4413        }
4414    }
4415
4416    /// Create a new variable with storage class specifiers.
4417    pub fn new_with_storage_class(
4418        name: String,
4419        var_type: Type,
4420        initializer: Option<Expression>,
4421        is_static: bool,
4422        is_extern: bool,
4423        is_const: bool,
4424    ) -> Self {
4425        Self {
4426            name,
4427            var_type,
4428            initializer,
4429            is_static,
4430            is_extern,
4431            is_const,
4432        }
4433    }
4434
4435    /// Get the variable name.
4436    pub fn name(&self) -> &str {
4437        &self.name
4438    }
4439
4440    /// Get the variable type.
4441    pub fn var_type(&self) -> &Type {
4442        &self.var_type
4443    }
4444
4445    /// Check if this variable is a function pointer.
4446    pub fn is_function_pointer(&self) -> bool {
4447        matches!(self.var_type, Type::FunctionPointer { .. })
4448    }
4449
4450    /// Get the number of parameters if this is a function pointer.
4451    pub fn function_pointer_param_count(&self) -> usize {
4452        match &self.var_type {
4453            Type::FunctionPointer { param_types, .. } => param_types.len(),
4454            _ => 0,
4455        }
4456    }
4457
4458    /// Check if this function pointer has a void return type.
4459    pub fn function_pointer_has_void_return(&self) -> bool {
4460        match &self.var_type {
4461            Type::FunctionPointer { return_type, .. } => matches!(**return_type, Type::Void),
4462            _ => false,
4463        }
4464    }
4465
4466    /// Check if this variable is a string literal (char* with literal initializer).
4467    ///
4468    /// Detects patterns like: `const char* msg = "Hello";`
4469    ///
4470    /// # Implementation
4471    ///
4472    /// Checks if:
4473    /// - Type is a pointer to char (`char*`)
4474    /// - Has an initializer that is a `StringLiteral` expression
4475    ///
4476    /// Note: Const qualifier detection not yet implemented - checks all char* pointers.
4477    pub fn is_string_literal(&self) -> bool {
4478        // Check if type is char*
4479        let is_char_ptr =
4480            matches!(self.var_type, Type::Pointer(ref inner) if **inner == Type::Char);
4481
4482        // Check if initializer is a string literal
4483        if let Some(initializer) = &self.initializer {
4484            is_char_ptr && matches!(initializer, Expression::StringLiteral(_))
4485        } else {
4486            false
4487        }
4488    }
4489
4490    /// Check if this variable is a string buffer (char* allocated with malloc).
4491    ///
4492    /// Detects patterns like: `char* buffer = malloc(100);`
4493    ///
4494    /// # Implementation
4495    ///
4496    /// Checks if:
4497    /// - Type is a pointer to char (`char*`)
4498    /// - Has an initializer that is a malloc/calloc function call
4499    pub fn is_string_buffer(&self) -> bool {
4500        // Check if type is char*
4501        let is_char_ptr =
4502            matches!(self.var_type, Type::Pointer(ref inner) if **inner == Type::Char);
4503
4504        // Check if initializer is malloc/calloc call
4505        if let Some(Expression::FunctionCall { function, .. }) = &self.initializer {
4506            is_char_ptr && (function == "malloc" || function == "calloc")
4507        } else {
4508            false
4509        }
4510    }
4511
4512    /// Get the initializer expression for this variable.
4513    ///
4514    /// Returns `Some(&Expression)` if the variable has an initializer, `None` otherwise.
4515    pub fn initializer(&self) -> Option<&Expression> {
4516        self.initializer.as_ref()
4517    }
4518
4519    /// Check if this variable has static storage class (file-local).
4520    pub fn is_static(&self) -> bool {
4521        self.is_static
4522    }
4523
4524    /// Check if this variable is extern (external linkage).
4525    pub fn is_extern(&self) -> bool {
4526        self.is_extern
4527    }
4528
4529    /// Check if this variable is const (immutable).
4530    pub fn is_const(&self) -> bool {
4531        self.is_const
4532    }
4533}
4534
4535/// Abstract Syntax Tree representing parsed C code.
4536#[derive(Debug, Clone, PartialEq)]
4537pub struct Ast {
4538    functions: Vec<Function>,
4539    typedefs: Vec<Typedef>,
4540    structs: Vec<Struct>,
4541    macros: Vec<MacroDefinition>,
4542    variables: Vec<Variable>,
4543}
4544
4545/// Represents a C macro definition (#define).
4546///
4547/// C macros come in two forms:
4548/// - **Object-like**: Simple text replacement (e.g., `#define MAX 100`)
4549/// - **Function-like**: Parameterized text replacement (e.g., `#define SQR(x) ((x) * (x))`)
4550///
4551/// # Examples
4552///
4553/// ```no_run
4554/// use decy_parser::parser::{CParser, MacroDefinition};
4555///
4556/// // Parse a simple object-like macro
4557/// let parser = CParser::new()?;
4558/// let ast = parser.parse("#define MAX 100\nint main() { return 0; }")?;
4559/// assert_eq!(ast.macros().len(), 1);
4560/// assert_eq!(ast.macros()[0].name(), "MAX");
4561/// assert!(ast.macros()[0].is_object_like());
4562///
4563/// // Parse a function-like macro
4564/// let ast2 = parser.parse("#define SQR(x) ((x) * (x))\nint main() { return 0; }")?;
4565/// assert_eq!(ast2.macros()[0].name(), "SQR");
4566/// assert!(ast2.macros()[0].is_function_like());
4567/// assert_eq!(ast2.macros()[0].parameters(), &["x"]);
4568/// # Ok::<(), anyhow::Error>(())
4569/// ```
4570///
4571/// # Reference
4572///
4573/// K&R §4.11, ISO C99 §6.10.3
4574#[derive(Debug, Clone, PartialEq)]
4575pub struct MacroDefinition {
4576    /// Macro name
4577    pub name: String,
4578    /// Parameters (empty for object-like macros)
4579    pub parameters: Vec<String>,
4580    /// Macro body (unparsed, tokenized without spaces)
4581    pub body: String,
4582}
4583
4584impl MacroDefinition {
4585    /// Create a new object-like macro.
4586    pub fn new_object_like(name: String, body: String) -> Self {
4587        Self {
4588            name,
4589            parameters: vec![],
4590            body,
4591        }
4592    }
4593
4594    /// Create a new function-like macro.
4595    pub fn new_function_like(name: String, parameters: Vec<String>, body: String) -> Self {
4596        Self {
4597            name,
4598            parameters,
4599            body,
4600        }
4601    }
4602
4603    /// Get the macro name.
4604    pub fn name(&self) -> &str {
4605        &self.name
4606    }
4607
4608    /// Get the macro parameters.
4609    pub fn parameters(&self) -> &[String] {
4610        &self.parameters
4611    }
4612
4613    /// Get the macro body.
4614    pub fn body(&self) -> &str {
4615        &self.body
4616    }
4617
4618    /// Check if this is a function-like macro.
4619    pub fn is_function_like(&self) -> bool {
4620        !self.parameters.is_empty()
4621    }
4622
4623    /// Check if this is an object-like macro.
4624    pub fn is_object_like(&self) -> bool {
4625        self.parameters.is_empty()
4626    }
4627}
4628
4629impl Ast {
4630    /// Create a new empty AST.
4631    pub fn new() -> Self {
4632        Self {
4633            functions: Vec::new(),
4634            typedefs: Vec::new(),
4635            structs: Vec::new(),
4636            macros: Vec::new(),
4637            variables: Vec::new(),
4638        }
4639    }
4640
4641    /// Get the functions in the AST.
4642    pub fn functions(&self) -> &[Function] {
4643        &self.functions
4644    }
4645
4646    /// Add a function to the AST.
4647    pub fn add_function(&mut self, function: Function) {
4648        self.functions.push(function);
4649    }
4650
4651    /// Get the typedefs in the AST.
4652    pub fn typedefs(&self) -> &[Typedef] {
4653        &self.typedefs
4654    }
4655
4656    /// Add a typedef to the AST.
4657    pub fn add_typedef(&mut self, typedef: Typedef) {
4658        self.typedefs.push(typedef);
4659    }
4660
4661    /// Get the structs in the AST.
4662    pub fn structs(&self) -> &[Struct] {
4663        &self.structs
4664    }
4665
4666    /// Add a struct to the AST.
4667    /// Deduplicates by name to avoid duplicate definitions from system includes.
4668    pub fn add_struct(&mut self, struct_def: Struct) {
4669        // Deduplicate: don't add if a struct with the same name already exists
4670        if !self.structs.iter().any(|s| s.name() == struct_def.name()) {
4671            self.structs.push(struct_def);
4672        }
4673    }
4674
4675    /// Get the macro definitions in the AST.
4676    pub fn macros(&self) -> &[MacroDefinition] {
4677        &self.macros
4678    }
4679
4680    /// Add a macro definition to the AST.
4681    pub fn add_macro(&mut self, macro_def: MacroDefinition) {
4682        self.macros.push(macro_def);
4683    }
4684
4685    /// Get the variables in the AST.
4686    pub fn variables(&self) -> &[Variable] {
4687        &self.variables
4688    }
4689
4690    /// Add a variable to the AST.
4691    pub fn add_variable(&mut self, variable: Variable) {
4692        self.variables.push(variable);
4693    }
4694}
4695
4696impl Default for Ast {
4697    fn default() -> Self {
4698        Self::new()
4699    }
4700}
4701
4702/// Represents a C function.
4703#[derive(Debug, Clone, PartialEq)]
4704pub struct Function {
4705    /// Function name
4706    pub name: String,
4707    /// Return type
4708    pub return_type: Type,
4709    /// Parameters
4710    pub parameters: Vec<Parameter>,
4711    /// Function body (statements)
4712    pub body: Vec<Statement>,
4713}
4714
4715impl Function {
4716    /// Create a new function.
4717    pub fn new(name: String, return_type: Type, parameters: Vec<Parameter>) -> Self {
4718        Self {
4719            name,
4720            return_type,
4721            parameters,
4722            body: Vec::new(),
4723        }
4724    }
4725
4726    /// Create a new function with body.
4727    pub fn new_with_body(
4728        name: String,
4729        return_type: Type,
4730        parameters: Vec<Parameter>,
4731        body: Vec<Statement>,
4732    ) -> Self {
4733        Self {
4734            name,
4735            return_type,
4736            parameters,
4737            body,
4738        }
4739    }
4740}
4741
4742/// Represents a C type.
4743#[derive(Debug, Clone, PartialEq)]
4744#[allow(clippy::enum_variant_names)] // TypeAlias is a meaningful variant name
4745pub enum Type {
4746    /// void
4747    Void,
4748    /// int
4749    Int,
4750    /// unsigned int (DECY-158)
4751    UnsignedInt,
4752    /// float
4753    Float,
4754    /// double
4755    Double,
4756    /// char
4757    Char,
4758    /// Pointer to a type
4759    Pointer(Box<Type>),
4760    /// Struct type (e.g., struct Point)
4761    Struct(String),
4762    /// Function pointer type (e.g., int (*callback)(int))
4763    FunctionPointer {
4764        /// Parameter types
4765        param_types: Vec<Type>,
4766        /// Return type
4767        return_type: Box<Type>,
4768    },
4769    /// Array type (e.g., `int arr[10]`)
4770    /// For typedef assertions like: `typedef char check[sizeof(int) == 4 ? 1 : -1]`
4771    Array {
4772        /// Element type
4773        element_type: Box<Type>,
4774        /// Array size (None for unknown/expression-based size)
4775        size: Option<i64>,
4776    },
4777    /// Type alias (typedef) - preserves the alias name
4778    /// DECY-172: Used for size_t, ssize_t, ptrdiff_t, etc.
4779    TypeAlias(String),
4780}
4781
4782/// Represents a function parameter.
4783#[derive(Debug, Clone, PartialEq)]
4784pub struct Parameter {
4785    /// Parameter name
4786    pub name: String,
4787    /// Parameter type
4788    pub param_type: Type,
4789    /// Whether the pointee type is const (for pointer params like `const char*`)
4790    /// DECY-135: Track const qualifier to enable const char* → &str transformation
4791    pub is_pointee_const: bool,
4792}
4793
4794impl Parameter {
4795    /// Create a new parameter.
4796    pub fn new(name: String, param_type: Type) -> Self {
4797        Self {
4798            name,
4799            param_type,
4800            is_pointee_const: false,
4801        }
4802    }
4803
4804    /// Create a new parameter with const pointee information.
4805    /// DECY-135: Used for const char* parameters
4806    pub fn new_with_const(name: String, param_type: Type, is_pointee_const: bool) -> Self {
4807        Self {
4808            name,
4809            param_type,
4810            is_pointee_const,
4811        }
4812    }
4813
4814    /// Check if this parameter is a function pointer.
4815    pub fn is_function_pointer(&self) -> bool {
4816        matches!(self.param_type, Type::FunctionPointer { .. })
4817    }
4818
4819    /// Check if this parameter is a const char pointer (const char*).
4820    ///
4821    /// DECY-135: Now properly checks if pointee is const-qualified.
4822    /// Returns `true` only for `const char*` parameters, not `char*`.
4823    pub fn is_const_char_pointer(&self) -> bool {
4824        self.is_pointee_const
4825            && matches!(self.param_type, Type::Pointer(ref inner) if matches!(**inner, Type::Char))
4826    }
4827
4828    /// Check if this parameter is any char pointer (char* or const char*).
4829    pub fn is_char_pointer(&self) -> bool {
4830        matches!(self.param_type, Type::Pointer(ref inner) if matches!(**inner, Type::Char))
4831    }
4832}
4833
4834#[cfg(test)]
4835#[path = "parser_tests.rs"]
4836mod parser_tests;
4837
4838#[cfg(test)]
4839#[path = "pointer_arithmetic_tests.rs"]
4840mod pointer_arithmetic_tests;
4841
4842#[cfg(test)]
4843#[path = "break_continue_tests.rs"]
4844mod break_continue_tests;