decy_parser/
parser.rs

1//! C parser implementation using clang-sys.
2//!
3//! This module provides the core parsing functionality to convert C source code
4//! into an AST representation using LLVM/Clang bindings.
5
6use anyhow::{Context, Result};
7use clang_sys::*;
8use std::ffi::{CStr, CString};
9use std::path::Path;
10use std::ptr;
11
12/// C parser using clang-sys.
13///
14/// # Examples
15///
16/// ```no_run
17/// use decy_parser::parser::CParser;
18///
19/// let parser = CParser::new()?;
20/// let ast = parser.parse("int main() { return 0; }")?;
21/// assert_eq!(ast.functions().len(), 1);
22/// # Ok::<(), anyhow::Error>(())
23/// ```
24#[derive(Debug)]
25pub struct CParser {
26    index: CXIndex,
27}
28
29impl CParser {
30    /// Create a new C parser.
31    ///
32    /// # Examples
33    ///
34    /// ```no_run
35    /// use decy_parser::parser::CParser;
36    ///
37    /// let parser = CParser::new()?;
38    /// # Ok::<(), anyhow::Error>(())
39    /// ```
40    pub fn new() -> Result<Self> {
41        // SAFETY: clang_createIndex is safe to call with these parameters
42        let index = unsafe { clang_createIndex(0, 0) };
43        if index.is_null() {
44            anyhow::bail!("Failed to create clang index");
45        }
46        Ok(Self { index })
47    }
48
49    /// Parse C source code into an AST.
50    ///
51    /// # Arguments
52    ///
53    /// * `source` - The C source code to parse
54    ///
55    /// # Returns
56    ///
57    /// * `Ok(Ast)` - The parsed AST
58    /// * `Err(anyhow::Error)` - If parsing fails
59    ///
60    /// # Examples
61    ///
62    /// ```no_run
63    /// use decy_parser::parser::CParser;
64    ///
65    /// let parser = CParser::new()?;
66    /// let ast = parser.parse("int add(int a, int b) { return a + b; }")?;
67    /// # Ok::<(), anyhow::Error>(())
68    /// ```
69    pub fn parse(&self, source: &str) -> Result<Ast> {
70        let filename = CString::new("input.c").context("Failed to create filename")?;
71        let source_cstr = CString::new(source).context("Failed to convert source to CString")?;
72
73        let mut ast = Ast::new();
74
75        // Handle empty input
76        if source.trim().is_empty() {
77            return Ok(ast);
78        }
79
80        // SAFETY: Creating unsaved file with valid C strings
81        let unsaved_file = CXUnsavedFile {
82            Filename: filename.as_ptr(),
83            Contents: source_cstr.as_ptr(),
84            Length: source.len() as std::os::raw::c_ulong,
85        };
86
87        // SAFETY: Parsing with clang_parseTranslationUnit2
88        // Use CXTranslationUnit_DetailedPreprocessingRecord to capture macro definitions
89        let mut tu = ptr::null_mut();
90        let result = unsafe {
91            clang_parseTranslationUnit2(
92                self.index,
93                filename.as_ptr(),
94                ptr::null(),
95                0,
96                &unsaved_file as *const CXUnsavedFile as *mut CXUnsavedFile,
97                1,
98                CXTranslationUnit_DetailedPreprocessingRecord,
99                &mut tu,
100            )
101        };
102
103        if result != CXError_Success || tu.is_null() {
104            anyhow::bail!("Failed to parse C source");
105        }
106
107        // SAFETY: Check for diagnostics (errors/warnings)
108        let num_diagnostics = unsafe { clang_getNumDiagnostics(tu) };
109        for i in 0..num_diagnostics {
110            let diag = unsafe { clang_getDiagnostic(tu, i) };
111            let severity = unsafe { clang_getDiagnosticSeverity(diag) };
112
113            // If we have errors, fail the parse
114            if severity >= CXDiagnostic_Error {
115                unsafe { clang_disposeDiagnostic(diag) };
116                unsafe { clang_disposeTranslationUnit(tu) };
117                anyhow::bail!("C source has syntax errors");
118            }
119
120            unsafe { clang_disposeDiagnostic(diag) };
121        }
122
123        // SAFETY: Getting cursor from valid translation unit
124        let cursor = unsafe { clang_getTranslationUnitCursor(tu) };
125
126        // Visit children to extract functions
127        let ast_ptr = &mut ast as *mut Ast;
128
129        // SAFETY: Visiting cursor children with callback
130        unsafe {
131            clang_visitChildren(cursor, visit_function, ast_ptr as CXClientData);
132
133            // Clean up
134            clang_disposeTranslationUnit(tu);
135        }
136
137        Ok(ast)
138    }
139
140    /// Parse a C file into an AST.
141    ///
142    /// # Arguments
143    ///
144    /// * `path` - Path to the C file
145    ///
146    /// # Returns
147    ///
148    /// * `Ok(Ast)` - The parsed AST
149    /// * `Err(anyhow::Error)` - If parsing fails
150    pub fn parse_file(&self, _path: &Path) -> Result<Ast> {
151        // RED phase: not yet implemented
152        Err(anyhow::anyhow!("Not implemented yet"))
153    }
154}
155
156impl Drop for CParser {
157    fn drop(&mut self) {
158        // SAFETY: Disposing of valid clang index
159        unsafe {
160            clang_disposeIndex(self.index);
161        }
162    }
163}
164
165/// Visitor callback for clang AST traversal.
166///
167/// # Safety
168///
169/// This function is called by clang_visitChildren and must follow C calling conventions.
170extern "C" fn visit_function(
171    cursor: CXCursor,
172    _parent: CXCursor,
173    client_data: CXClientData,
174) -> CXChildVisitResult {
175    // SAFETY: Converting client data back to AST pointer
176    let ast = unsafe { &mut *(client_data as *mut Ast) };
177
178    // SAFETY: Getting cursor kind
179    let kind = unsafe { clang_getCursorKind(cursor) };
180
181    if kind == CXCursor_FunctionDecl {
182        // Extract function information
183        if let Some(function) = extract_function(cursor) {
184            ast.add_function(function);
185        }
186    } else if kind == CXCursor_TypedefDecl {
187        // Extract typedef information
188        if let Some(typedef) = extract_typedef(cursor) {
189            ast.add_typedef(typedef);
190        }
191    } else if kind == CXCursor_StructDecl {
192        // Extract struct information
193        if let Some(struct_def) = extract_struct(cursor) {
194            ast.add_struct(struct_def);
195        }
196    } else if kind == CXCursor_VarDecl {
197        // Extract variable declaration
198        if let Some(variable) = extract_variable(cursor) {
199            ast.add_variable(variable);
200        }
201    } else if kind == CXCursor_MacroDefinition {
202        // Extract macro definition (only from main file, not includes)
203        let location = unsafe { clang_getCursorLocation(cursor) };
204        let mut file: CXFile = ptr::null_mut();
205        unsafe {
206            clang_getFileLocation(
207                location,
208                &mut file,
209                ptr::null_mut(),
210                ptr::null_mut(),
211                ptr::null_mut(),
212            );
213        }
214
215        // Only process macros from the main file (not system headers)
216        if !file.is_null() {
217            let file_name = unsafe {
218                let name_cxstring = clang_getFileName(file);
219                let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
220                let name = c_str.to_string_lossy().into_owned();
221                clang_disposeString(name_cxstring);
222                name
223            };
224
225            // Only add macros from input.c (our source file)
226            if file_name.ends_with("input.c") {
227                if let Some(macro_def) = extract_macro(cursor) {
228                    ast.add_macro(macro_def);
229                }
230            }
231        }
232    }
233
234    CXChildVisit_Continue
235}
236
237/// Extract function information from a clang cursor.
238fn extract_function(cursor: CXCursor) -> Option<Function> {
239    // SAFETY: Getting cursor spelling (function name)
240    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
241    let name = unsafe {
242        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
243        let name = c_str.to_string_lossy().into_owned();
244        clang_disposeString(name_cxstring);
245        name
246    };
247
248    // SAFETY: Getting return type
249    let cx_type = unsafe { clang_getCursorType(cursor) };
250    let return_cx_type = unsafe { clang_getResultType(cx_type) };
251    let return_type = convert_type(return_cx_type)?;
252
253    // Extract parameters
254    let num_args = unsafe { clang_Cursor_getNumArguments(cursor) };
255    let mut parameters = Vec::new();
256
257    for i in 0..num_args {
258        // SAFETY: Getting argument cursor
259        let arg_cursor = unsafe { clang_Cursor_getArgument(cursor, i as u32) };
260
261        // Get parameter name
262        let param_name_cxstring = unsafe { clang_getCursorSpelling(arg_cursor) };
263        let param_name = unsafe {
264            let c_str = CStr::from_ptr(clang_getCString(param_name_cxstring));
265            let name = c_str.to_string_lossy().into_owned();
266            clang_disposeString(param_name_cxstring);
267            name
268        };
269
270        // Get parameter type
271        let param_cx_type = unsafe { clang_getCursorType(arg_cursor) };
272        if let Some(param_type) = convert_type(param_cx_type) {
273            parameters.push(Parameter::new(param_name, param_type));
274        }
275    }
276
277    // Extract function body by visiting children
278    let mut body = Vec::new();
279    let body_ptr = &mut body as *mut Vec<Statement>;
280
281    unsafe {
282        clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
283    }
284
285    Some(Function::new_with_body(name, return_type, parameters, body))
286}
287
288/// Extract typedef information from a clang cursor.
289fn extract_typedef(cursor: CXCursor) -> Option<Typedef> {
290    // SAFETY: Getting typedef name
291    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
292    let name = unsafe {
293        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
294        let name = c_str.to_string_lossy().into_owned();
295        clang_disposeString(name_cxstring);
296        name
297    };
298
299    // SAFETY: Getting underlying type of typedef
300    let cx_type = unsafe { clang_getTypedefDeclUnderlyingType(cursor) };
301    let underlying_type = convert_type(cx_type)?;
302
303    Some(Typedef::new(name, underlying_type))
304}
305
306/// Extract struct information from a clang cursor.
307fn extract_struct(cursor: CXCursor) -> Option<Struct> {
308    // SAFETY: Getting struct name
309    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
310    let name = unsafe {
311        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
312        let name = c_str.to_string_lossy().into_owned();
313        clang_disposeString(name_cxstring);
314        name
315    };
316
317    // Skip anonymous structs
318    if name.is_empty() {
319        return None;
320    }
321
322    // Extract struct fields by visiting children
323    let mut fields = Vec::new();
324    let fields_ptr = &mut fields as *mut Vec<StructField>;
325
326    unsafe {
327        clang_visitChildren(cursor, visit_struct_fields, fields_ptr as CXClientData);
328    }
329
330    Some(Struct::new(name, fields))
331}
332
333/// Extract macro definition from a clang cursor.
334///
335/// Extract variable declaration information from a clang cursor.
336///
337/// Extracts global and local variable declarations, including function pointers.
338///
339/// # Examples
340///
341/// Simple: `int x;`
342/// Function pointer: `int (*callback)(int);`
343fn extract_variable(cursor: CXCursor) -> Option<Variable> {
344    // SAFETY: Getting cursor spelling (variable name)
345    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
346    let name = unsafe {
347        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
348        let name = c_str.to_string_lossy().into_owned();
349        clang_disposeString(name_cxstring);
350        name
351    };
352
353    // SAFETY: Getting variable type
354    let cx_type = unsafe { clang_getCursorType(cursor) };
355    let var_type = convert_type(cx_type)?;
356
357    // Extract initializer by visiting children
358    let mut initializer: Option<Expression> = None;
359    let initializer_ptr = &mut initializer as *mut Option<Expression>;
360
361    unsafe {
362        clang_visitChildren(
363            cursor,
364            visit_variable_initializer,
365            initializer_ptr as CXClientData,
366        );
367    }
368
369    if let Some(init_expr) = initializer {
370        Some(Variable::new_with_initializer(name, var_type, init_expr))
371    } else {
372        Some(Variable::new(name, var_type))
373    }
374}
375
376/// Helper function to extract an expression from a cursor.
377/// Dispatches to the appropriate extract function based on cursor kind.
378#[allow(non_upper_case_globals)]
379fn try_extract_expression(cursor: CXCursor) -> Option<Expression> {
380    let kind = unsafe { clang_getCursorKind(cursor) };
381
382    match kind {
383        CXCursor_IntegerLiteral => extract_int_literal(cursor),
384        CXCursor_StringLiteral => extract_string_literal(cursor),
385        CXCursor_DeclRefExpr => extract_variable_ref(cursor),
386        CXCursor_BinaryOperator => extract_binary_op(cursor),
387        CXCursor_CallExpr => extract_function_call(cursor),
388        CXCursor_UnaryOperator => extract_unary_op(cursor),
389        CXCursor_ArraySubscriptExpr => extract_array_index(cursor),
390        CXCursor_MemberRefExpr => extract_field_access(cursor),
391        CXCursor_UnexposedExpr => {
392            // UnexposedExpr is a wrapper - recurse into children
393            let mut result: Option<Expression> = None;
394            let result_ptr = &mut result as *mut Option<Expression>;
395            unsafe {
396                clang_visitChildren(
397                    cursor,
398                    visit_variable_initializer,
399                    result_ptr as CXClientData,
400                );
401            }
402            result
403        }
404        _ => None,
405    }
406}
407
408/// Visitor callback for variable initializer expressions.
409#[allow(non_upper_case_globals)]
410extern "C" fn visit_variable_initializer(
411    cursor: CXCursor,
412    _parent: CXCursor,
413    client_data: CXClientData,
414) -> CXChildVisitResult {
415    let initializer = unsafe { &mut *(client_data as *mut Option<Expression>) };
416
417    // Extract the first expression found (the initializer)
418    if let Some(expr) = try_extract_expression(cursor) {
419        *initializer = Some(expr);
420        return CXChildVisit_Break;
421    }
422
423    CXChildVisit_Continue
424}
425
426/// This function extracts #define directives, supporting both object-like and function-like macros.
427///
428/// # Examples
429///
430/// Object-like: `#define MAX 100`
431/// Function-like: `#define SQR(x) ((x) * (x))`
432fn extract_macro(cursor: CXCursor) -> Option<MacroDefinition> {
433    // SAFETY: Getting macro name
434    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
435    let name = unsafe {
436        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
437        let name = c_str.to_string_lossy().into_owned();
438        clang_disposeString(name_cxstring);
439        name
440    };
441
442    // Skip empty macro names
443    if name.is_empty() {
444        return None;
445    }
446
447    // Get macro body using clang_Cursor_isMacroFunctionLike and clang token APIs
448    // For now, we'll check if it's function-like and extract tokens
449    let is_function_like = unsafe { clang_sys::clang_Cursor_isMacroFunctionLike(cursor) } != 0;
450
451    // Get the source range and tokens for the macro
452    let range = unsafe { clang_getCursorExtent(cursor) };
453    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
454
455    let mut tokens: *mut CXToken = ptr::null_mut();
456    let mut num_tokens: u32 = 0;
457
458    unsafe {
459        clang_tokenize(tu, range, &mut tokens, &mut num_tokens);
460    }
461
462    // Extract macro body from tokens
463    // Skip the first token (macro name) and extract the rest
464    let mut parameters = Vec::new();
465    let mut body_tokens = Vec::new();
466    let mut in_params = false;
467
468    for i in 0..num_tokens {
469        let token = unsafe { *tokens.offset(i as isize) };
470        let token_kind = unsafe { clang_getTokenKind(token) };
471        let token_spelling = unsafe { clang_getTokenSpelling(tu, token) };
472        let token_str = unsafe {
473            let c_str = CStr::from_ptr(clang_getCString(token_spelling));
474            let s = c_str.to_string_lossy().into_owned();
475            clang_disposeString(token_spelling);
476            s
477        };
478
479        // Skip the macro name (first token)
480        if i == 0 {
481            continue;
482        }
483
484        // Check for parameter list (function-like macros)
485        if is_function_like && i == 1 && token_str == "(" {
486            in_params = true;
487            continue;
488        }
489
490        if in_params {
491            if token_str == ")" {
492                in_params = false;
493                continue;
494            } else if token_str != ","
495                && (token_kind == CXToken_Identifier || token_kind == CXToken_Keyword)
496            {
497                // Accept both identifiers and keywords as parameter names
498                // C allows keywords in macro parameter names since they're in macro scope
499                parameters.push(token_str);
500            }
501        } else {
502            body_tokens.push(token_str);
503        }
504    }
505
506    // Clean up tokens
507    unsafe {
508        clang_disposeTokens(tu, tokens, num_tokens);
509    }
510
511    // Join body tokens without spaces (preserving original formatting)
512    let body = body_tokens.join("");
513
514    if is_function_like {
515        Some(MacroDefinition::new_function_like(name, parameters, body))
516    } else {
517        Some(MacroDefinition::new_object_like(name, body))
518    }
519}
520
521/// Visitor callback for struct fields.
522///
523/// # Safety
524///
525/// This function is called by clang_visitChildren and must follow C calling conventions.
526#[allow(non_upper_case_globals)]
527extern "C" fn visit_struct_fields(
528    cursor: CXCursor,
529    _parent: CXCursor,
530    client_data: CXClientData,
531) -> CXChildVisitResult {
532    // SAFETY: Converting client data back to fields vector pointer
533    let fields = unsafe { &mut *(client_data as *mut Vec<StructField>) };
534
535    // SAFETY: Getting cursor kind
536    let kind = unsafe { clang_getCursorKind(cursor) };
537
538    if kind == CXCursor_FieldDecl {
539        // Get field name
540        let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
541        let name = unsafe {
542            let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
543            let name = c_str.to_string_lossy().into_owned();
544            clang_disposeString(name_cxstring);
545            name
546        };
547
548        // Get field type
549        let cx_type = unsafe { clang_getCursorType(cursor) };
550        if let Some(field_type) = convert_type(cx_type) {
551            fields.push(StructField::new(name, field_type));
552        }
553    }
554
555    CXChildVisit_Continue
556}
557
558/// Visitor callback for extracting statements from function body.
559///
560/// # Safety
561///
562/// This function is called by clang_visitChildren and must follow C calling conventions.
563#[allow(non_upper_case_globals)]
564extern "C" fn visit_statement(
565    cursor: CXCursor,
566    _parent: CXCursor,
567    client_data: CXClientData,
568) -> CXChildVisitResult {
569    // SAFETY: Converting client data back to statement vector pointer
570    let statements = unsafe { &mut *(client_data as *mut Vec<Statement>) };
571
572    // SAFETY: Getting cursor kind
573    let kind = unsafe { clang_getCursorKind(cursor) };
574
575    match kind {
576        CXCursor_CompoundStmt => {
577            // Compound statement (function body) - recurse into it
578            CXChildVisit_Recurse
579        }
580        CXCursor_DeclStmt => {
581            // Declaration statement - visit its children to get the actual declaration
582            CXChildVisit_Recurse
583        }
584        CXCursor_VarDecl => {
585            // Variable declaration
586            if let Some(stmt) = extract_var_decl(cursor) {
587                statements.push(stmt);
588            }
589            CXChildVisit_Continue
590        }
591        CXCursor_ReturnStmt => {
592            // Return statement
593            if let Some(stmt) = extract_return_stmt(cursor) {
594                statements.push(stmt);
595            }
596            CXChildVisit_Continue
597        }
598        CXCursor_BinaryOperator => {
599            // Could be an assignment statement (x = 42)
600            if let Some(stmt) = extract_assignment_stmt(cursor) {
601                statements.push(stmt);
602            }
603            CXChildVisit_Continue
604        }
605        CXCursor_IfStmt => {
606            // If statement
607            if let Some(stmt) = extract_if_stmt(cursor) {
608                statements.push(stmt);
609            }
610            CXChildVisit_Continue
611        }
612        CXCursor_ForStmt => {
613            // For loop
614            if let Some(stmt) = extract_for_stmt(cursor) {
615                statements.push(stmt);
616            }
617            CXChildVisit_Continue
618        }
619        CXCursor_WhileStmt => {
620            // While loop
621            if let Some(stmt) = extract_while_stmt(cursor) {
622                statements.push(stmt);
623            }
624            CXChildVisit_Continue
625        }
626        CXCursor_SwitchStmt => {
627            // Switch statement
628            if let Some(stmt) = extract_switch_stmt(cursor) {
629                statements.push(stmt);
630            }
631            CXChildVisit_Continue
632        }
633        CXCursor_BreakStmt => {
634            // Break statement
635            statements.push(Statement::Break);
636            CXChildVisit_Continue
637        }
638        CXCursor_ContinueStmt => {
639            // Continue statement
640            statements.push(Statement::Continue);
641            CXChildVisit_Continue
642        }
643        CXCursor_UnaryOperator => {
644            // Could be ++/-- statement (ptr++, ++ptr, ptr--, --ptr)
645            if let Some(stmt) = extract_inc_dec_stmt(cursor) {
646                statements.push(stmt);
647            }
648            CXChildVisit_Continue
649        }
650        CXCursor_CompoundAssignOperator => {
651            // Compound assignment (+=, -=, *=, /=, %=)
652            if let Some(stmt) = extract_compound_assignment_stmt(cursor) {
653                statements.push(stmt);
654            }
655            CXChildVisit_Continue
656        }
657        _ => CXChildVisit_Recurse, // Recurse into unknown nodes to find statements
658    }
659}
660
661/// Extract a variable declaration statement.
662fn extract_var_decl(cursor: CXCursor) -> Option<Statement> {
663    // Get variable name
664    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
665    let name = unsafe {
666        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
667        let name = c_str.to_string_lossy().into_owned();
668        clang_disposeString(name_cxstring);
669        name
670    };
671
672    // Get variable type
673    let cx_type = unsafe { clang_getCursorType(cursor) };
674    let var_type = convert_type(cx_type)?;
675
676    // Extract initializer by visiting children
677    let mut initializer: Option<Expression> = None;
678    let init_ptr = &mut initializer as *mut Option<Expression>;
679
680    unsafe {
681        clang_visitChildren(cursor, visit_expression, init_ptr as CXClientData);
682    }
683
684    Some(Statement::VariableDeclaration {
685        name,
686        var_type,
687        initializer,
688    })
689}
690
691/// Extract a return statement.
692fn extract_return_stmt(cursor: CXCursor) -> Option<Statement> {
693    // Extract return expression by visiting children
694    let mut return_expr: Option<Expression> = None;
695    let expr_ptr = &mut return_expr as *mut Option<Expression>;
696
697    unsafe {
698        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
699    }
700
701    Some(Statement::Return(return_expr))
702}
703
704/// Extract an assignment statement.
705fn extract_assignment_stmt(cursor: CXCursor) -> Option<Statement> {
706    // Check if this binary operator is an assignment '=' (not '==', '!=', etc.)
707    // Get the translation unit
708    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
709    if tu.is_null() {
710        return None;
711    }
712
713    // Get the extent (source range) of the cursor
714    let extent = unsafe { clang_getCursorExtent(cursor) };
715
716    // Tokenize to find the operator
717    let mut tokens = ptr::null_mut();
718    let mut num_tokens = 0;
719
720    unsafe {
721        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
722    }
723
724    let mut is_assignment = false;
725
726    // Look through tokens to find '=' (and make sure it's not '==', '!=', etc.)
727    for i in 0..num_tokens {
728        unsafe {
729            let token = *tokens.add(i as usize);
730            let token_kind = clang_getTokenKind(token);
731
732            if token_kind == CXToken_Punctuation {
733                let token_cxstring = clang_getTokenSpelling(tu, token);
734                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
735                if let Ok(token_str) = c_str.to_str() {
736                    // Only accept single '=' for assignment
737                    if token_str == "=" {
738                        is_assignment = true;
739                        clang_disposeString(token_cxstring);
740                        break;
741                    } else if token_str == "=="
742                        || token_str == "!="
743                        || token_str == "<="
744                        || token_str == ">="
745                    {
746                        // This is a comparison operator, not assignment
747                        clang_disposeString(token_cxstring);
748                        break;
749                    }
750                }
751                clang_disposeString(token_cxstring);
752            }
753        }
754    }
755
756    unsafe {
757        clang_disposeTokens(tu, tokens, num_tokens);
758    }
759
760    if !is_assignment {
761        return None;
762    }
763
764    // Extract left side (target) and right side (value)
765    let mut operands: Vec<Expression> = Vec::new();
766    let operands_ptr = &mut operands as *mut Vec<Expression>;
767
768    unsafe {
769        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
770    }
771
772    // Assignment should have exactly 2 operands
773    if operands.len() != 2 {
774        return None;
775    }
776
777    // Check if left side is a dereference (e.g., *dst = x)
778    if let Expression::Dereference(inner) = &operands[0] {
779        return Some(Statement::DerefAssignment {
780            target: (**inner).clone(), // Extract the inner expression being dereferenced
781            value: operands[1].clone(),
782        });
783    }
784
785    // Check if left side is an array index (e.g., arr[i] = value)
786    if let Expression::ArrayIndex { array, index } = &operands[0] {
787        return Some(Statement::ArrayIndexAssignment {
788            array: array.clone(),
789            index: index.clone(),
790            value: operands[1].clone(),
791        });
792    }
793
794    // Check if left side is a field access (e.g., ptr->field = value or obj.field = value)
795    if matches!(
796        &operands[0],
797        Expression::PointerFieldAccess { .. } | Expression::FieldAccess { .. }
798    ) {
799        // Extract field name from the expression
800        let field = match &operands[0] {
801            Expression::PointerFieldAccess { field, .. } => field.clone(),
802            Expression::FieldAccess { field, .. } => field.clone(),
803            _ => unreachable!(),
804        };
805
806        // Extract object from the expression
807        let object = match &operands[0] {
808            Expression::PointerFieldAccess { pointer, .. } => (**pointer).clone(),
809            Expression::FieldAccess { object, .. } => (**object).clone(),
810            _ => unreachable!(),
811        };
812
813        return Some(Statement::FieldAssignment {
814            object,
815            field,
816            value: operands[1].clone(),
817        });
818    }
819
820    // Left side must be a variable reference for regular assignment
821    let target = match &operands[0] {
822        Expression::Variable(name) => name.clone(),
823        _ => return None, // Can't assign to non-variables (yet)
824    };
825
826    Some(Statement::Assignment {
827        target,
828        value: operands[1].clone(),
829    })
830}
831
832/// Extract an increment/decrement statement (++, --).
833fn extract_inc_dec_stmt(cursor: CXCursor) -> Option<Statement> {
834    // Get the translation unit
835    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
836    if tu.is_null() {
837        return None;
838    }
839
840    // Get the extent (source range) of the cursor
841    let extent = unsafe { clang_getCursorExtent(cursor) };
842
843    // Tokenize to find the operator
844    let mut tokens = ptr::null_mut();
845    let mut num_tokens = 0;
846
847    unsafe {
848        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
849    }
850
851    let mut operator: Option<String> = None;
852    let mut operator_position = 0;
853
854    // Look through tokens to find ++ or --
855    for i in 0..num_tokens {
856        unsafe {
857            let token = *tokens.add(i as usize);
858            let token_kind = clang_getTokenKind(token);
859
860            if token_kind == CXToken_Punctuation {
861                let token_cxstring = clang_getTokenSpelling(tu, token);
862                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
863                if let Ok(token_str) = c_str.to_str() {
864                    if token_str == "++" || token_str == "--" {
865                        operator = Some(token_str.to_string()); // Clone the string before disposing
866                        operator_position = i;
867                        clang_disposeString(token_cxstring);
868                        break;
869                    }
870                }
871                clang_disposeString(token_cxstring);
872            }
873        }
874    }
875
876    // Determine if this is pre or post increment/decrement
877    // If operator comes before identifier, it's pre (++ptr)
878    // If operator comes after identifier, it's post (ptr++)
879    let is_pre = operator_position == 0;
880
881    unsafe {
882        clang_disposeTokens(tu, tokens, num_tokens);
883    }
884
885    // Extract the target variable name by visiting children
886    let mut target_name: Option<String> = None;
887
888    // Visit children to find the DeclRefExpr (variable reference)
889    extern "C" fn visit_for_target(
890        cursor: CXCursor,
891        _parent: CXCursor,
892        client_data: CXClientData,
893    ) -> CXChildVisitResult {
894        let target = unsafe { &mut *(client_data as *mut Option<String>) };
895        let kind = unsafe { clang_getCursorKind(cursor) };
896
897        if kind == CXCursor_DeclRefExpr {
898            let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
899            let name = unsafe {
900                let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
901                let var_name = c_str.to_string_lossy().into_owned();
902                clang_disposeString(name_cxstring);
903                var_name
904            };
905            *target = Some(name);
906            CXChildVisit_Break
907        } else {
908            CXChildVisit_Recurse
909        }
910    }
911
912    let target_ptr = &mut target_name as *mut Option<String>;
913    unsafe {
914        clang_visitChildren(cursor, visit_for_target, target_ptr as CXClientData);
915    }
916
917    let target = target_name?;
918
919    match operator?.as_str() {
920        "++" => {
921            if is_pre {
922                Some(Statement::PreIncrement { target })
923            } else {
924                Some(Statement::PostIncrement { target })
925            }
926        }
927        "--" => {
928            if is_pre {
929                Some(Statement::PreDecrement { target })
930            } else {
931                Some(Statement::PostDecrement { target })
932            }
933        }
934        _ => None,
935    }
936}
937
938/// Extract a compound assignment statement (+=, -=, *=, /=, %=).
939fn extract_compound_assignment_stmt(cursor: CXCursor) -> Option<Statement> {
940    // Get the translation unit
941    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
942    if tu.is_null() {
943        return None;
944    }
945
946    // Get the extent (source range) of the cursor
947    let extent = unsafe { clang_getCursorExtent(cursor) };
948
949    // Tokenize to find the operator
950    let mut tokens = ptr::null_mut();
951    let mut num_tokens = 0;
952
953    unsafe {
954        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
955    }
956
957    let mut operator: Option<BinaryOperator> = None;
958
959    // Look through tokens to find compound assignment operator
960    for i in 0..num_tokens {
961        unsafe {
962            let token = *tokens.add(i as usize);
963            let token_kind = clang_getTokenKind(token);
964
965            if token_kind == CXToken_Punctuation {
966                let token_cxstring = clang_getTokenSpelling(tu, token);
967                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
968                if let Ok(token_str) = c_str.to_str() {
969                    operator = match token_str {
970                        "+=" => Some(BinaryOperator::Add),
971                        "-=" => Some(BinaryOperator::Subtract),
972                        "*=" => Some(BinaryOperator::Multiply),
973                        "/=" => Some(BinaryOperator::Divide),
974                        "%=" => Some(BinaryOperator::Modulo),
975                        _ => None,
976                    };
977                    if operator.is_some() {
978                        clang_disposeString(token_cxstring);
979                        break;
980                    }
981                }
982                clang_disposeString(token_cxstring);
983            }
984        }
985    }
986
987    unsafe {
988        clang_disposeTokens(tu, tokens, num_tokens);
989    }
990
991    let op = operator?;
992
993    // Extract left side (target) and right side (value)
994    let mut operands: Vec<Expression> = Vec::new();
995    let operands_ptr = &mut operands as *mut Vec<Expression>;
996
997    unsafe {
998        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
999    }
1000
1001    // Compound assignment should have exactly 2 operands
1002    if operands.len() != 2 {
1003        return None;
1004    }
1005
1006    // Left side must be a variable reference
1007    let target = match &operands[0] {
1008        Expression::Variable(name) => name.clone(),
1009        _ => return None,
1010    };
1011
1012    Some(Statement::CompoundAssignment {
1013        target,
1014        op,
1015        value: operands[1].clone(),
1016    })
1017}
1018
1019/// Extract an if statement.
1020fn extract_if_stmt(cursor: CXCursor) -> Option<Statement> {
1021    // An if statement has 2 or 3 children:
1022    // 1. Condition expression
1023    // 2. Then block (compound statement)
1024    // 3. Else block (optional compound statement)
1025
1026    #[repr(C)]
1027    struct IfData {
1028        condition: Option<Expression>,
1029        then_block: Vec<Statement>,
1030        else_block: Option<Vec<Statement>>,
1031        child_index: u32,
1032    }
1033
1034    let mut if_data = IfData {
1035        condition: None,
1036        then_block: Vec::new(),
1037        else_block: None,
1038        child_index: 0,
1039    };
1040
1041    let data_ptr = &mut if_data as *mut IfData;
1042
1043    unsafe {
1044        clang_visitChildren(cursor, visit_if_children, data_ptr as CXClientData);
1045    }
1046
1047    Some(Statement::If {
1048        condition: if_data.condition?,
1049        then_block: if_data.then_block,
1050        else_block: if_data.else_block,
1051    })
1052}
1053
1054/// Visitor for if statement children.
1055#[allow(non_upper_case_globals)]
1056extern "C" fn visit_if_children(
1057    cursor: CXCursor,
1058    _parent: CXCursor,
1059    client_data: CXClientData,
1060) -> CXChildVisitResult {
1061    #[repr(C)]
1062    struct IfData {
1063        condition: Option<Expression>,
1064        then_block: Vec<Statement>,
1065        else_block: Option<Vec<Statement>>,
1066        child_index: u32,
1067    }
1068
1069    let if_data = unsafe { &mut *(client_data as *mut IfData) };
1070    let kind = unsafe { clang_getCursorKind(cursor) };
1071
1072    match if_data.child_index {
1073        0 => {
1074            // First child: condition expression
1075            // The cursor itself IS the condition, extract it directly
1076            if_data.condition = match kind {
1077                CXCursor_BinaryOperator => extract_binary_op(cursor),
1078                CXCursor_IntegerLiteral => extract_int_literal(cursor),
1079                CXCursor_DeclRefExpr => extract_variable_ref(cursor),
1080                CXCursor_CallExpr => extract_function_call(cursor),
1081                CXCursor_UnaryOperator => extract_unary_op(cursor),
1082                _ => {
1083                    // For other expression types, try visiting children
1084                    let mut cond_expr: Option<Expression> = None;
1085                    let expr_ptr = &mut cond_expr as *mut Option<Expression>;
1086                    unsafe {
1087                        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
1088                    }
1089                    cond_expr
1090                }
1091            };
1092            if_data.child_index += 1;
1093            CXChildVisit_Continue
1094        }
1095        1 => {
1096            // Second child: then block
1097            if kind == CXCursor_CompoundStmt {
1098                let body_ptr = &mut if_data.then_block as *mut Vec<Statement>;
1099                unsafe {
1100                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1101                }
1102            }
1103            if_data.child_index += 1;
1104            CXChildVisit_Continue
1105        }
1106        2 => {
1107            // Third child (optional): else block
1108            if kind == CXCursor_CompoundStmt || kind == CXCursor_IfStmt {
1109                let mut else_stmts = Vec::new();
1110                let body_ptr = &mut else_stmts as *mut Vec<Statement>;
1111                unsafe {
1112                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1113                }
1114                if_data.else_block = Some(else_stmts);
1115            }
1116            if_data.child_index += 1;
1117            CXChildVisit_Continue
1118        }
1119        _ => CXChildVisit_Continue,
1120    }
1121}
1122
1123/// Extract a for loop statement.
1124fn extract_for_stmt(cursor: CXCursor) -> Option<Statement> {
1125    // A for loop has up to 4 children:
1126    // 1. Init statement (optional - could be DeclStmt or expression)
1127    // 2. Condition expression (optional)
1128    // 3. Increment expression (optional)
1129    // 4. Body (compound statement)
1130
1131    #[repr(C)]
1132    struct ForData {
1133        init: Option<Box<Statement>>,
1134        condition: Option<Expression>,
1135        increment: Option<Box<Statement>>,
1136        body: Vec<Statement>,
1137        child_index: u32,
1138    }
1139
1140    let mut for_data = ForData {
1141        init: None,
1142        condition: None,
1143        increment: None,
1144        body: Vec::new(),
1145        child_index: 0,
1146    };
1147
1148    let data_ptr = &mut for_data as *mut ForData;
1149
1150    unsafe {
1151        clang_visitChildren(cursor, visit_for_children, data_ptr as CXClientData);
1152    }
1153
1154    Some(Statement::For {
1155        init: for_data.init,
1156        condition: for_data.condition,
1157        increment: for_data.increment,
1158        body: for_data.body,
1159    })
1160}
1161
1162/// Visitor for for loop children.
1163#[allow(non_upper_case_globals)]
1164extern "C" fn visit_for_children(
1165    cursor: CXCursor,
1166    _parent: CXCursor,
1167    client_data: CXClientData,
1168) -> CXChildVisitResult {
1169    #[repr(C)]
1170    struct ForData {
1171        init: Option<Box<Statement>>,
1172        condition: Option<Expression>,
1173        increment: Option<Box<Statement>>,
1174        body: Vec<Statement>,
1175        child_index: u32,
1176    }
1177
1178    let for_data = unsafe { &mut *(client_data as *mut ForData) };
1179    let kind = unsafe { clang_getCursorKind(cursor) };
1180
1181    match for_data.child_index {
1182        0 => {
1183            // First child: init statement (could be DeclStmt or NULL)
1184            if kind == CXCursor_DeclStmt {
1185                // Visit to get the variable declaration
1186                let mut init_stmts = Vec::new();
1187                let ptr = &mut init_stmts as *mut Vec<Statement>;
1188                unsafe {
1189                    clang_visitChildren(cursor, visit_statement, ptr as CXClientData);
1190                }
1191                if let Some(stmt) = init_stmts.into_iter().next() {
1192                    for_data.init = Some(Box::new(stmt));
1193                }
1194            } else if kind == CXCursor_BinaryOperator {
1195                // Assignment in init
1196                if let Some(stmt) = extract_assignment_stmt(cursor) {
1197                    for_data.init = Some(Box::new(stmt));
1198                }
1199            }
1200            for_data.child_index += 1;
1201            CXChildVisit_Continue
1202        }
1203        1 => {
1204            // Second child: condition expression
1205            // The cursor itself IS the condition, extract it directly
1206            for_data.condition = match kind {
1207                CXCursor_BinaryOperator => extract_binary_op(cursor),
1208                CXCursor_IntegerLiteral => extract_int_literal(cursor),
1209                CXCursor_DeclRefExpr => extract_variable_ref(cursor),
1210                CXCursor_CallExpr => extract_function_call(cursor),
1211                CXCursor_UnaryOperator => extract_unary_op(cursor),
1212                _ => {
1213                    let mut cond_expr: Option<Expression> = None;
1214                    let expr_ptr = &mut cond_expr as *mut Option<Expression>;
1215                    unsafe {
1216                        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
1217                    }
1218                    cond_expr
1219                }
1220            };
1221            for_data.child_index += 1;
1222            CXChildVisit_Continue
1223        }
1224        2 => {
1225            // Third child: increment statement
1226            if kind == CXCursor_BinaryOperator {
1227                if let Some(stmt) = extract_assignment_stmt(cursor) {
1228                    for_data.increment = Some(Box::new(stmt));
1229                }
1230            } else if kind == CXCursor_UnaryOperator {
1231                // Handle ++/-- in increment position
1232                if let Some(stmt) = extract_inc_dec_stmt(cursor) {
1233                    for_data.increment = Some(Box::new(stmt));
1234                }
1235            }
1236            for_data.child_index += 1;
1237            CXChildVisit_Continue
1238        }
1239        3 => {
1240            // Fourth child: body
1241            if kind == CXCursor_CompoundStmt {
1242                let body_ptr = &mut for_data.body as *mut Vec<Statement>;
1243                unsafe {
1244                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1245                }
1246            }
1247            for_data.child_index += 1;
1248            CXChildVisit_Continue
1249        }
1250        _ => CXChildVisit_Continue,
1251    }
1252}
1253
1254/// Extract a while loop statement.
1255fn extract_while_stmt(cursor: CXCursor) -> Option<Statement> {
1256    // A while loop has 2 children:
1257    // 1. Condition expression
1258    // 2. Body (compound statement)
1259
1260    #[repr(C)]
1261    struct WhileData {
1262        condition: Option<Expression>,
1263        body: Vec<Statement>,
1264        child_index: u32,
1265    }
1266
1267    let mut while_data = WhileData {
1268        condition: None,
1269        body: Vec::new(),
1270        child_index: 0,
1271    };
1272
1273    let data_ptr = &mut while_data as *mut WhileData;
1274
1275    unsafe {
1276        clang_visitChildren(cursor, visit_while_children, data_ptr as CXClientData);
1277    }
1278
1279    Some(Statement::While {
1280        condition: while_data.condition?,
1281        body: while_data.body,
1282    })
1283}
1284
1285/// Visitor for while loop children.
1286#[allow(non_upper_case_globals)]
1287extern "C" fn visit_while_children(
1288    cursor: CXCursor,
1289    _parent: CXCursor,
1290    client_data: CXClientData,
1291) -> CXChildVisitResult {
1292    #[repr(C)]
1293    struct WhileData {
1294        condition: Option<Expression>,
1295        body: Vec<Statement>,
1296        child_index: u32,
1297    }
1298
1299    let while_data = unsafe { &mut *(client_data as *mut WhileData) };
1300    let kind = unsafe { clang_getCursorKind(cursor) };
1301
1302    match while_data.child_index {
1303        0 => {
1304            // First child: condition expression
1305            // The cursor itself IS the condition, extract it directly
1306            while_data.condition = match kind {
1307                CXCursor_BinaryOperator => extract_binary_op(cursor),
1308                CXCursor_IntegerLiteral => extract_int_literal(cursor),
1309                CXCursor_DeclRefExpr => extract_variable_ref(cursor),
1310                CXCursor_CallExpr => extract_function_call(cursor),
1311                CXCursor_UnaryOperator => extract_unary_op(cursor),
1312                _ => {
1313                    let mut cond_expr: Option<Expression> = None;
1314                    let expr_ptr = &mut cond_expr as *mut Option<Expression>;
1315                    unsafe {
1316                        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
1317                    }
1318                    cond_expr
1319                }
1320            };
1321            while_data.child_index += 1;
1322            CXChildVisit_Continue
1323        }
1324        1 => {
1325            // Second child: body
1326            if kind == CXCursor_CompoundStmt {
1327                let body_ptr = &mut while_data.body as *mut Vec<Statement>;
1328                unsafe {
1329                    clang_visitChildren(cursor, visit_statement, body_ptr as CXClientData);
1330                }
1331            }
1332            while_data.child_index += 1;
1333            CXChildVisit_Continue
1334        }
1335        _ => CXChildVisit_Continue,
1336    }
1337}
1338
1339/// Extract a switch statement from a cursor.
1340///
1341/// Parses switch statements including cases and default labels.
1342#[allow(non_upper_case_globals)]
1343fn extract_switch_stmt(cursor: CXCursor) -> Option<Statement> {
1344    // Switch has 2 children:
1345    // 1. Condition expression
1346    // 2. Body (compound statement containing case/default labels)
1347
1348    #[repr(C)]
1349    struct SwitchData {
1350        condition: Option<Expression>,
1351        cases: Vec<SwitchCase>,
1352        default_case: Option<Vec<Statement>>,
1353        child_index: u32,
1354    }
1355
1356    let mut switch_data = SwitchData {
1357        condition: None,
1358        cases: Vec::new(),
1359        default_case: None,
1360        child_index: 0,
1361    };
1362
1363    let data_ptr = &mut switch_data as *mut SwitchData;
1364
1365    unsafe {
1366        clang_visitChildren(cursor, visit_switch_children, data_ptr as CXClientData);
1367    }
1368
1369    Some(Statement::Switch {
1370        condition: switch_data.condition?,
1371        cases: switch_data.cases,
1372        default_case: switch_data.default_case,
1373    })
1374}
1375
1376/// Visitor callback for switch statement children (condition and body).
1377#[allow(non_upper_case_globals)]
1378extern "C" fn visit_switch_children(
1379    cursor: CXCursor,
1380    _parent: CXCursor,
1381    client_data: CXClientData,
1382) -> CXChildVisitResult {
1383    #[repr(C)]
1384    struct SwitchData {
1385        condition: Option<Expression>,
1386        cases: Vec<SwitchCase>,
1387        default_case: Option<Vec<Statement>>,
1388        child_index: u32,
1389    }
1390
1391    let switch_data = unsafe { &mut *(client_data as *mut SwitchData) };
1392    let kind = unsafe { clang_getCursorKind(cursor) };
1393
1394    match switch_data.child_index {
1395        0 => {
1396            // First child: condition expression
1397            if let Some(expr) = try_extract_expression(cursor) {
1398                switch_data.condition = Some(expr);
1399            }
1400            switch_data.child_index += 1;
1401            CXChildVisit_Continue
1402        }
1403        1 => {
1404            // Second child: compound statement body containing cases
1405            // Need to visit this recursively to find case/default labels
1406            if kind == CXCursor_CompoundStmt {
1407                unsafe {
1408                    clang_visitChildren(cursor, visit_switch_body, client_data);
1409                }
1410            }
1411            switch_data.child_index += 1;
1412            CXChildVisit_Continue
1413        }
1414        _ => CXChildVisit_Continue,
1415    }
1416}
1417
1418/// Visitor callback for switch body to extract cases and default.
1419#[allow(non_upper_case_globals)]
1420extern "C" fn visit_switch_body(
1421    cursor: CXCursor,
1422    _parent: CXCursor,
1423    client_data: CXClientData,
1424) -> CXChildVisitResult {
1425    #[repr(C)]
1426    struct SwitchData {
1427        condition: Option<Expression>,
1428        cases: Vec<SwitchCase>,
1429        default_case: Option<Vec<Statement>>,
1430        child_index: u32,
1431    }
1432
1433    let switch_data = unsafe { &mut *(client_data as *mut SwitchData) };
1434    let kind = unsafe { clang_getCursorKind(cursor) };
1435
1436    match kind {
1437        CXCursor_CaseStmt => {
1438            // Extract case statement
1439            if let Some(case) = extract_case_stmt(cursor) {
1440                switch_data.cases.push(case);
1441            }
1442            CXChildVisit_Continue
1443        }
1444        CXCursor_DefaultStmt => {
1445            // Extract default statement
1446            if let Some(body) = extract_default_stmt(cursor) {
1447                switch_data.default_case = Some(body);
1448            }
1449            CXChildVisit_Continue
1450        }
1451        _ => CXChildVisit_Continue,
1452    }
1453}
1454
1455/// Extract a case statement from a cursor.
1456fn extract_case_stmt(cursor: CXCursor) -> Option<SwitchCase> {
1457    // Case statement has 2 children:
1458    // 1. Case value expression
1459    // 2. Body (statements following the case label)
1460
1461    #[repr(C)]
1462    struct CaseData {
1463        value: Option<Expression>,
1464        body: Vec<Statement>,
1465        child_index: u32,
1466    }
1467
1468    let mut case_data = CaseData {
1469        value: None,
1470        body: Vec::new(),
1471        child_index: 0,
1472    };
1473
1474    let data_ptr = &mut case_data as *mut CaseData;
1475
1476    unsafe {
1477        clang_visitChildren(cursor, visit_case_children, data_ptr as CXClientData);
1478    }
1479
1480    Some(SwitchCase {
1481        value: case_data.value,
1482        body: case_data.body,
1483    })
1484}
1485
1486/// Visitor for case statement children.
1487#[allow(non_upper_case_globals)]
1488extern "C" fn visit_case_children(
1489    cursor: CXCursor,
1490    _parent: CXCursor,
1491    client_data: CXClientData,
1492) -> CXChildVisitResult {
1493    #[repr(C)]
1494    struct CaseData {
1495        value: Option<Expression>,
1496        body: Vec<Statement>,
1497        child_index: u32,
1498    }
1499
1500    let case_data = unsafe { &mut *(client_data as *mut CaseData) };
1501    let _kind = unsafe { clang_getCursorKind(cursor) };
1502
1503    match case_data.child_index {
1504        0 => {
1505            // First child: case value expression
1506            if let Some(expr) = try_extract_expression(cursor) {
1507                case_data.value = Some(expr);
1508            }
1509            case_data.child_index += 1;
1510            CXChildVisit_Continue
1511        }
1512        _ => {
1513            // Subsequent children: statements in case body
1514            // Extract statements until we hit another case or default
1515            if let Some(stmt) = extract_statement(cursor) {
1516                case_data.body.push(stmt);
1517            }
1518            // Continue recursing to find all statements in the case body
1519            CXChildVisit_Recurse
1520        }
1521    }
1522}
1523
1524/// Extract a default statement from a cursor.
1525fn extract_default_stmt(cursor: CXCursor) -> Option<Vec<Statement>> {
1526    // Default statement has body statements as children
1527    let mut body: Vec<Statement> = Vec::new();
1528    let body_ptr = &mut body as *mut Vec<Statement>;
1529
1530    unsafe {
1531        clang_visitChildren(cursor, visit_default_children, body_ptr as CXClientData);
1532    }
1533
1534    Some(body)
1535}
1536
1537/// Visitor for default statement children.
1538#[allow(non_upper_case_globals)]
1539extern "C" fn visit_default_children(
1540    cursor: CXCursor,
1541    _parent: CXCursor,
1542    client_data: CXClientData,
1543) -> CXChildVisitResult {
1544    let body = unsafe { &mut *(client_data as *mut Vec<Statement>) };
1545
1546    // Extract all statements in default body
1547    if let Some(stmt) = extract_statement(cursor) {
1548        body.push(stmt);
1549    }
1550
1551    CXChildVisit_Continue
1552}
1553
1554/// Helper function to extract a statement from a cursor based on its kind.
1555#[allow(non_upper_case_globals)]
1556fn extract_statement(cursor: CXCursor) -> Option<Statement> {
1557    let kind = unsafe { clang_getCursorKind(cursor) };
1558
1559    match kind {
1560        CXCursor_ReturnStmt => extract_return_stmt(cursor),
1561        CXCursor_VarDecl => extract_var_decl(cursor),
1562        CXCursor_IfStmt => extract_if_stmt(cursor),
1563        CXCursor_ForStmt => extract_for_stmt(cursor),
1564        CXCursor_WhileStmt => extract_while_stmt(cursor),
1565        CXCursor_BreakStmt => Some(Statement::Break),
1566        CXCursor_ContinueStmt => Some(Statement::Continue),
1567        CXCursor_UnaryOperator => extract_inc_dec_stmt(cursor),
1568        CXCursor_BinaryOperator => extract_assignment_stmt(cursor),
1569        CXCursor_CallExpr => {
1570            // Function call as statement
1571            if let Some(Expression::FunctionCall {
1572                function,
1573                arguments,
1574            }) = extract_function_call(cursor)
1575            {
1576                return Some(Statement::FunctionCall {
1577                    function,
1578                    arguments,
1579                });
1580            }
1581            None
1582        }
1583        _ => None,
1584    }
1585}
1586
1587/// Visitor callback for extracting expressions.
1588///
1589/// # Safety
1590///
1591/// This function is called by clang_visitChildren and must follow C calling conventions.
1592#[allow(non_upper_case_globals)]
1593extern "C" fn visit_expression(
1594    cursor: CXCursor,
1595    _parent: CXCursor,
1596    client_data: CXClientData,
1597) -> CXChildVisitResult {
1598    // SAFETY: Converting client data back to expression option pointer
1599    let expr_opt = unsafe { &mut *(client_data as *mut Option<Expression>) };
1600
1601    // SAFETY: Getting cursor kind
1602    let kind = unsafe { clang_getCursorKind(cursor) };
1603
1604    match kind {
1605        CXCursor_IntegerLiteral => {
1606            // Integer literal
1607            if let Some(expr) = extract_int_literal(cursor) {
1608                *expr_opt = Some(expr);
1609            }
1610            CXChildVisit_Continue
1611        }
1612        CXCursor_StringLiteral => {
1613            // String literal
1614            if let Some(expr) = extract_string_literal(cursor) {
1615                *expr_opt = Some(expr);
1616            }
1617            CXChildVisit_Continue
1618        }
1619        CXCursor_DeclRefExpr => {
1620            // Variable reference (e.g., "a" or "b" in "a + b")
1621            if let Some(expr) = extract_variable_ref(cursor) {
1622                *expr_opt = Some(expr);
1623            }
1624            CXChildVisit_Continue
1625        }
1626        CXCursor_BinaryOperator => {
1627            // Binary operation (e.g., a + b)
1628            if let Some(expr) = extract_binary_op(cursor) {
1629                *expr_opt = Some(expr);
1630            }
1631            CXChildVisit_Continue
1632        }
1633        CXCursor_CallExpr => {
1634            // Function call
1635            if let Some(expr) = extract_function_call(cursor) {
1636                *expr_opt = Some(expr);
1637            }
1638            CXChildVisit_Continue
1639        }
1640        CXCursor_UnaryOperator => {
1641            // Unary operator (e.g., *ptr dereference)
1642            if let Some(expr) = extract_unary_op(cursor) {
1643                *expr_opt = Some(expr);
1644            }
1645            CXChildVisit_Continue
1646        }
1647        CXCursor_ArraySubscriptExpr => {
1648            // Array indexing (e.g., arr[i])
1649            if let Some(expr) = extract_array_index(cursor) {
1650                *expr_opt = Some(expr);
1651            }
1652            CXChildVisit_Continue
1653        }
1654        CXCursor_MemberRefExpr => {
1655            // Field access (e.g., ptr->field or obj.field)
1656            if let Some(expr) = extract_field_access(cursor) {
1657                *expr_opt = Some(expr);
1658            }
1659            CXChildVisit_Continue
1660        }
1661        CXCursor_UnexposedExpr => {
1662            // Unexposed expressions might wrap other expressions (like ImplicitCastExpr wrapping CallExpr)
1663            // Recurse first to check if there's a more specific expression inside
1664            CXChildVisit_Recurse
1665        }
1666        CXCursor_ParenExpr => {
1667            // Parenthesized expressions wrap other expressions, recurse
1668            CXChildVisit_Recurse
1669        }
1670        136 => {
1671            // CXCursor_UnaryExpr - could be sizeof or other unary expr
1672            if let Some(expr) = extract_sizeof(cursor) {
1673                *expr_opt = Some(expr);
1674                CXChildVisit_Continue
1675            } else {
1676                // Not sizeof, recurse for other unary expressions
1677                CXChildVisit_Recurse
1678            }
1679        }
1680        _ => CXChildVisit_Recurse,
1681    }
1682}
1683
1684/// Extract an integer literal expression.
1685fn extract_int_literal(cursor: CXCursor) -> Option<Expression> {
1686    // SAFETY: Get the extent (source range) of the cursor
1687    let extent = unsafe { clang_getCursorExtent(cursor) };
1688
1689    // SAFETY: Get the translation unit from the cursor
1690    let tu = unsafe {
1691        let loc = clang_getCursorLocation(cursor);
1692        let mut file = ptr::null_mut();
1693        let mut line = 0;
1694        let mut column = 0;
1695        let mut offset = 0;
1696        clang_getFileLocation(loc, &mut file, &mut line, &mut column, &mut offset);
1697
1698        // Get the translation unit containing this cursor
1699        // We need to traverse up to get it, but for now use a different approach
1700        clang_Cursor_getTranslationUnit(cursor)
1701    };
1702
1703    if tu.is_null() {
1704        return Some(Expression::IntLiteral(0));
1705    }
1706
1707    // SAFETY: Tokenize the extent
1708    let mut tokens = ptr::null_mut();
1709    let mut num_tokens = 0;
1710
1711    unsafe {
1712        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
1713    }
1714
1715    let mut value = 0;
1716
1717    if num_tokens > 0 {
1718        // SAFETY: Get the spelling of the first token
1719        unsafe {
1720            let token_cxstring = clang_getTokenSpelling(tu, *tokens);
1721            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
1722            if let Ok(token_str) = c_str.to_str() {
1723                value = token_str.parse().unwrap_or(0);
1724            }
1725            clang_disposeString(token_cxstring);
1726
1727            // SAFETY: Dispose tokens
1728            clang_disposeTokens(tu, tokens, num_tokens);
1729        }
1730    }
1731
1732    Some(Expression::IntLiteral(value))
1733}
1734
1735/// Extract a string literal expression.
1736fn extract_string_literal(cursor: CXCursor) -> Option<Expression> {
1737    // SAFETY: Get the extent (source range) of the cursor
1738    let extent = unsafe { clang_getCursorExtent(cursor) };
1739
1740    // SAFETY: Get the translation unit from the cursor
1741    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
1742
1743    if tu.is_null() {
1744        return Some(Expression::StringLiteral(String::new()));
1745    }
1746
1747    // SAFETY: Tokenize the extent
1748    let mut tokens = ptr::null_mut();
1749    let mut num_tokens = 0;
1750
1751    unsafe {
1752        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
1753    }
1754
1755    let mut value = String::new();
1756
1757    if num_tokens > 0 {
1758        // SAFETY: Get the spelling of the first token
1759        unsafe {
1760            let token_cxstring = clang_getTokenSpelling(tu, *tokens);
1761            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
1762            if let Ok(token_str) = c_str.to_str() {
1763                // Remove surrounding quotes from string literal
1764                value = token_str.trim_matches('"').to_string();
1765            }
1766            clang_disposeString(token_cxstring);
1767
1768            // SAFETY: Dispose tokens
1769            clang_disposeTokens(tu, tokens, num_tokens);
1770        }
1771    }
1772
1773    Some(Expression::StringLiteral(value))
1774}
1775
1776/// Extract a variable reference expression.
1777fn extract_variable_ref(cursor: CXCursor) -> Option<Expression> {
1778    // Get variable name
1779    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
1780    let name = unsafe {
1781        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
1782        let var_name = c_str.to_string_lossy().into_owned();
1783        clang_disposeString(name_cxstring);
1784        var_name
1785    };
1786
1787    Some(Expression::Variable(name))
1788}
1789
1790/// Extract a binary operation expression.
1791fn extract_binary_op(cursor: CXCursor) -> Option<Expression> {
1792    // Extract operator by tokenizing
1793    let op = extract_binary_operator(cursor)?;
1794
1795    // Extract left and right operands by visiting children
1796    let mut operands: Vec<Expression> = Vec::new();
1797    let operands_ptr = &mut operands as *mut Vec<Expression>;
1798
1799    unsafe {
1800        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
1801    }
1802
1803    // Binary operators should have exactly 2 operands
1804    if operands.len() != 2 {
1805        return None;
1806    }
1807
1808    Some(Expression::BinaryOp {
1809        op,
1810        left: Box::new(operands[0].clone()),
1811        right: Box::new(operands[1].clone()),
1812    })
1813}
1814
1815/// Visitor callback for binary operator operands.
1816#[allow(non_upper_case_globals)]
1817extern "C" fn visit_binary_operand(
1818    cursor: CXCursor,
1819    _parent: CXCursor,
1820    client_data: CXClientData,
1821) -> CXChildVisitResult {
1822    let operands = unsafe { &mut *(client_data as *mut Vec<Expression>) };
1823    let kind = unsafe { clang_getCursorKind(cursor) };
1824
1825    match kind {
1826        CXCursor_IntegerLiteral => {
1827            if let Some(expr) = extract_int_literal(cursor) {
1828                operands.push(expr);
1829            }
1830            CXChildVisit_Continue
1831        }
1832        CXCursor_StringLiteral => {
1833            if let Some(expr) = extract_string_literal(cursor) {
1834                operands.push(expr);
1835            }
1836            CXChildVisit_Continue
1837        }
1838        CXCursor_DeclRefExpr => {
1839            if let Some(expr) = extract_variable_ref(cursor) {
1840                operands.push(expr);
1841            }
1842            CXChildVisit_Continue
1843        }
1844        CXCursor_BinaryOperator => {
1845            // Nested binary operation
1846            if let Some(expr) = extract_binary_op(cursor) {
1847                operands.push(expr);
1848            }
1849            CXChildVisit_Continue
1850        }
1851        CXCursor_UnaryOperator => {
1852            // Unary operation (e.g., *ptr dereference)
1853            if let Some(expr) = extract_unary_op(cursor) {
1854                operands.push(expr);
1855            }
1856            CXChildVisit_Continue
1857        }
1858        CXCursor_ArraySubscriptExpr => {
1859            // Array indexing (e.g., arr[i])
1860            if let Some(expr) = extract_array_index(cursor) {
1861                operands.push(expr);
1862            }
1863            CXChildVisit_Continue
1864        }
1865        CXCursor_MemberRefExpr => {
1866            // Field access (e.g., ptr->field or obj.field)
1867            if let Some(expr) = extract_field_access(cursor) {
1868                operands.push(expr);
1869            }
1870            CXChildVisit_Continue
1871        }
1872        CXCursor_UnexposedExpr | CXCursor_ParenExpr => {
1873            // Unexposed expressions might be sizeof or wrap other expressions
1874            if let Some(expr) = extract_sizeof(cursor) {
1875                operands.push(expr);
1876                CXChildVisit_Continue
1877            } else {
1878                CXChildVisit_Recurse
1879            }
1880        }
1881        136 => {
1882            // CXCursor_UnaryExpr - includes sizeof, alignof, etc.
1883            if let Some(expr) = extract_sizeof(cursor) {
1884                operands.push(expr);
1885                CXChildVisit_Continue
1886            } else {
1887                CXChildVisit_Recurse
1888            }
1889        }
1890        CXCursor_CallExpr => {
1891            // Function call expression (e.g., malloc(size))
1892            if let Some(expr) = extract_function_call(cursor) {
1893                operands.push(expr);
1894            }
1895            CXChildVisit_Continue
1896        }
1897        _ => CXChildVisit_Recurse,
1898    }
1899}
1900
1901/// Extract the binary operator from a cursor by tokenizing.
1902#[allow(non_upper_case_globals)]
1903fn extract_binary_operator(cursor: CXCursor) -> Option<BinaryOperator> {
1904    // Get the translation unit
1905    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
1906    if tu.is_null() {
1907        return None;
1908    }
1909
1910    // Get the extent (source range) of the cursor
1911    let extent = unsafe { clang_getCursorExtent(cursor) };
1912
1913    // Tokenize to find the operator
1914    let mut tokens = ptr::null_mut();
1915    let mut num_tokens = 0;
1916
1917    unsafe {
1918        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
1919    }
1920
1921    let mut operator = None;
1922
1923    // Look through tokens to find the operator
1924    // For compound expressions like "a > 0 && b > 0", we need to find the LAST
1925    // operator (the one with lowest precedence) which represents THIS binary operation.
1926    // We scan from right to left to find operators with lowest precedence first.
1927    // Precedence (lowest to highest): || > && > == != > < > <= >= > + - > * / %
1928
1929    let mut candidates: Vec<(usize, BinaryOperator)> = Vec::new();
1930    let mut found_first_operand = false;
1931
1932    for i in 0..num_tokens {
1933        unsafe {
1934            let token = *tokens.add(i as usize);
1935            let token_kind = clang_getTokenKind(token);
1936
1937            // Track when we've seen the first operand (identifier or literal)
1938            if token_kind == CXToken_Identifier || token_kind == CXToken_Literal {
1939                found_first_operand = true;
1940            }
1941
1942            // Collect all operator candidates after the first operand
1943            if token_kind == CXToken_Punctuation && found_first_operand {
1944                let token_cxstring = clang_getTokenSpelling(tu, token);
1945                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
1946                if let Ok(token_str) = c_str.to_str() {
1947                    let op = match token_str {
1948                        "+" => Some(BinaryOperator::Add),
1949                        "-" => Some(BinaryOperator::Subtract),
1950                        "*" => Some(BinaryOperator::Multiply),
1951                        "/" => Some(BinaryOperator::Divide),
1952                        "%" => Some(BinaryOperator::Modulo),
1953                        "==" => Some(BinaryOperator::Equal),
1954                        "!=" => Some(BinaryOperator::NotEqual),
1955                        "<" => Some(BinaryOperator::LessThan),
1956                        ">" => Some(BinaryOperator::GreaterThan),
1957                        "<=" => Some(BinaryOperator::LessEqual),
1958                        ">=" => Some(BinaryOperator::GreaterEqual),
1959                        "&&" => Some(BinaryOperator::LogicalAnd),
1960                        "||" => Some(BinaryOperator::LogicalOr),
1961                        _ => None,
1962                    };
1963                    if let Some(op) = op {
1964                        candidates.push((i as usize, op));
1965                    }
1966                }
1967                clang_disposeString(token_cxstring);
1968            }
1969        }
1970    }
1971
1972    // Select the operator with lowest precedence (appears last in our search)
1973    // This handles cases like "a > 0 && b > 0" where && should be selected over >
1974    if !candidates.is_empty() {
1975        // Priority: || > && > comparisons > arithmetic
1976        // Find the first || operator
1977        for (_, op) in &candidates {
1978            if matches!(op, BinaryOperator::LogicalOr) {
1979                operator = Some(*op);
1980                break;
1981            }
1982        }
1983        // If no ||, find first &&
1984        if operator.is_none() {
1985            for (_, op) in &candidates {
1986                if matches!(op, BinaryOperator::LogicalAnd) {
1987                    operator = Some(*op);
1988                    break;
1989                }
1990            }
1991        }
1992        // If no logical operators, find operator with lowest precedence
1993        // Precedence (lowest to highest): comparisons (==, !=, <, >, <=, >=) > arithmetic (+, -) > multiplicative (*, /, %)
1994        if operator.is_none() {
1995            // Find first comparison operator (==, !=, <, >, <=, >=)
1996            for (_, op) in &candidates {
1997                if matches!(
1998                    op,
1999                    BinaryOperator::Equal
2000                        | BinaryOperator::NotEqual
2001                        | BinaryOperator::LessThan
2002                        | BinaryOperator::GreaterThan
2003                        | BinaryOperator::LessEqual
2004                        | BinaryOperator::GreaterEqual
2005                ) {
2006                    operator = Some(*op);
2007                    break;
2008                }
2009            }
2010        }
2011        // If no comparisons, find first additive operator (+, -)
2012        if operator.is_none() {
2013            for (_, op) in &candidates {
2014                if matches!(op, BinaryOperator::Add | BinaryOperator::Subtract) {
2015                    operator = Some(*op);
2016                    break;
2017                }
2018            }
2019        }
2020        // If no additive, take first multiplicative operator (*, /, %)
2021        if operator.is_none() {
2022            operator = Some(candidates[0].1);
2023        }
2024    }
2025
2026    unsafe {
2027        clang_disposeTokens(tu, tokens, num_tokens);
2028    }
2029
2030    operator
2031}
2032
2033/// Extract a function call expression.
2034fn extract_function_call(cursor: CXCursor) -> Option<Expression> {
2035    // Get function name
2036    let name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
2037    let function = unsafe {
2038        let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
2039        let name = c_str.to_string_lossy().into_owned();
2040        clang_disposeString(name_cxstring);
2041        name
2042    };
2043
2044    // Extract arguments by visiting children
2045    // We use a struct to track if we've seen the function reference yet
2046    #[repr(C)]
2047    struct ArgData {
2048        arguments: Vec<Expression>,
2049        skip_first_declref: bool,
2050    }
2051
2052    let mut arg_data = ArgData {
2053        arguments: Vec::new(),
2054        skip_first_declref: true, // Skip the first DeclRefExpr (function name)
2055    };
2056    let args_ptr = &mut arg_data as *mut ArgData;
2057
2058    unsafe {
2059        clang_visitChildren(cursor, visit_call_argument, args_ptr as CXClientData);
2060    }
2061
2062    Some(Expression::FunctionCall {
2063        function,
2064        arguments: arg_data.arguments,
2065    })
2066}
2067
2068/// Visitor callback for function call arguments.
2069///
2070/// # Safety
2071///
2072/// This function is called by clang_visitChildren and must follow C calling conventions.
2073#[allow(non_upper_case_globals)]
2074extern "C" fn visit_call_argument(
2075    cursor: CXCursor,
2076    _parent: CXCursor,
2077    client_data: CXClientData,
2078) -> CXChildVisitResult {
2079    #[repr(C)]
2080    struct ArgData {
2081        arguments: Vec<Expression>,
2082        skip_first_declref: bool,
2083    }
2084
2085    // SAFETY: Converting client data back to ArgData pointer
2086    let arg_data = unsafe { &mut *(client_data as *mut ArgData) };
2087
2088    // SAFETY: Getting cursor kind
2089    let kind = unsafe { clang_getCursorKind(cursor) };
2090
2091    match kind {
2092        CXCursor_IntegerLiteral => {
2093            if let Some(expr) = extract_int_literal(cursor) {
2094                arg_data.arguments.push(expr);
2095            }
2096            CXChildVisit_Continue
2097        }
2098        CXCursor_StringLiteral => {
2099            if let Some(expr) = extract_string_literal(cursor) {
2100                arg_data.arguments.push(expr);
2101            }
2102            CXChildVisit_Continue
2103        }
2104        CXCursor_DeclRefExpr => {
2105            // Variable reference argument
2106            // The first DeclRefExpr is the function being called, skip it
2107            if arg_data.skip_first_declref {
2108                arg_data.skip_first_declref = false;
2109                CXChildVisit_Continue
2110            } else {
2111                if let Some(expr) = extract_variable_ref(cursor) {
2112                    arg_data.arguments.push(expr);
2113                }
2114                CXChildVisit_Continue
2115            }
2116        }
2117        CXCursor_BinaryOperator => {
2118            // Binary operation in argument (e.g., x + 1, y * 2)
2119            if let Some(expr) = extract_binary_op(cursor) {
2120                arg_data.arguments.push(expr);
2121            }
2122            CXChildVisit_Continue
2123        }
2124        CXCursor_CallExpr => {
2125            // Nested function call (e.g., add(add(x, 5), add(10, 20)))
2126            if let Some(expr) = extract_function_call(cursor) {
2127                arg_data.arguments.push(expr);
2128            }
2129            CXChildVisit_Continue
2130        }
2131        CXCursor_UnaryOperator => {
2132            // Unary operation in argument (e.g., -x, !flag)
2133            if let Some(expr) = extract_unary_op(cursor) {
2134                arg_data.arguments.push(expr);
2135            }
2136            CXChildVisit_Continue
2137        }
2138        CXCursor_ArraySubscriptExpr => {
2139            // Array indexing in argument (e.g., arr[i])
2140            if let Some(expr) = extract_array_index(cursor) {
2141                arg_data.arguments.push(expr);
2142            }
2143            CXChildVisit_Continue
2144        }
2145        CXCursor_MemberRefExpr => {
2146            // Field access in argument (e.g., ptr->field or obj.field)
2147            if let Some(expr) = extract_field_access(cursor) {
2148                arg_data.arguments.push(expr);
2149            }
2150            CXChildVisit_Continue
2151        }
2152        CXCursor_UnexposedExpr | CXCursor_ParenExpr => {
2153            // Unexposed expressions might wrap actual expressions or be sizeof, try to extract
2154            if let Some(expr) = extract_sizeof(cursor) {
2155                arg_data.arguments.push(expr);
2156                CXChildVisit_Continue
2157            } else {
2158                CXChildVisit_Recurse
2159            }
2160        }
2161        136 => {
2162            // CXCursor_UnaryExpr - includes sizeof, alignof, etc.
2163            if let Some(expr) = extract_sizeof(cursor) {
2164                arg_data.arguments.push(expr);
2165                CXChildVisit_Continue
2166            } else {
2167                CXChildVisit_Recurse
2168            }
2169        }
2170        _ => CXChildVisit_Continue, // Skip other unknown children
2171    }
2172}
2173
2174/// Extract a unary operator expression.
2175fn extract_unary_op(cursor: CXCursor) -> Option<Expression> {
2176    // Get the translation unit
2177    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2178    if tu.is_null() {
2179        return None;
2180    }
2181
2182    // Get the extent (source range) of the cursor
2183    let extent = unsafe { clang_getCursorExtent(cursor) };
2184
2185    // Tokenize to find the operator
2186    let mut tokens = ptr::null_mut();
2187    let mut num_tokens = 0;
2188
2189    unsafe {
2190        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2191    }
2192
2193    let mut operator: Option<UnaryOperator> = None;
2194    let mut is_dereference = false;
2195    let mut is_increment = false;
2196    let mut is_decrement = false;
2197    let mut operator_position = 0;
2198
2199    // Look through tokens to find the unary operator
2200    for i in 0..num_tokens {
2201        unsafe {
2202            let token = *tokens.add(i as usize);
2203            let token_kind = clang_getTokenKind(token);
2204
2205            if token_kind == CXToken_Punctuation {
2206                let token_cxstring = clang_getTokenSpelling(tu, token);
2207                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2208                if let Ok(token_str) = c_str.to_str() {
2209                    match token_str {
2210                        "*" => {
2211                            is_dereference = true;
2212                            clang_disposeString(token_cxstring);
2213                            break;
2214                        }
2215                        "-" => {
2216                            operator = Some(UnaryOperator::Minus);
2217                            clang_disposeString(token_cxstring);
2218                            break;
2219                        }
2220                        "!" => {
2221                            operator = Some(UnaryOperator::LogicalNot);
2222                            clang_disposeString(token_cxstring);
2223                            break;
2224                        }
2225                        "~" => {
2226                            operator = Some(UnaryOperator::BitwiseNot);
2227                            clang_disposeString(token_cxstring);
2228                            break;
2229                        }
2230                        "&" => {
2231                            operator = Some(UnaryOperator::AddressOf);
2232                            clang_disposeString(token_cxstring);
2233                            break;
2234                        }
2235                        "++" => {
2236                            is_increment = true;
2237                            operator_position = i;
2238                            clang_disposeString(token_cxstring);
2239                            break;
2240                        }
2241                        "--" => {
2242                            is_decrement = true;
2243                            operator_position = i;
2244                            clang_disposeString(token_cxstring);
2245                            break;
2246                        }
2247                        _ => {}
2248                    }
2249                }
2250                clang_disposeString(token_cxstring);
2251            }
2252        }
2253    }
2254
2255    unsafe {
2256        clang_disposeTokens(tu, tokens, num_tokens);
2257    }
2258
2259    // Extract the operand
2260    let mut operand: Option<Expression> = None;
2261    let operand_ptr = &mut operand as *mut Option<Expression>;
2262
2263    unsafe {
2264        clang_visitChildren(cursor, visit_expression, operand_ptr as CXClientData);
2265    }
2266
2267    let operand_expr = operand?;
2268
2269    // Handle dereference separately (maintains backward compatibility)
2270    if is_dereference {
2271        return Some(Expression::Dereference(Box::new(operand_expr)));
2272    }
2273
2274    // Handle increment/decrement operators
2275    if is_increment {
2276        // Check if pre or post increment
2277        let is_pre = operator_position == 0;
2278        if is_pre {
2279            return Some(Expression::PreIncrement {
2280                operand: Box::new(operand_expr),
2281            });
2282        } else {
2283            return Some(Expression::PostIncrement {
2284                operand: Box::new(operand_expr),
2285            });
2286        }
2287    }
2288
2289    if is_decrement {
2290        // Check if pre or post decrement
2291        let is_pre = operator_position == 0;
2292        if is_pre {
2293            return Some(Expression::PreDecrement {
2294                operand: Box::new(operand_expr),
2295            });
2296        } else {
2297            return Some(Expression::PostDecrement {
2298                operand: Box::new(operand_expr),
2299            });
2300        }
2301    }
2302
2303    // Handle other unary operators
2304    if let Some(op) = operator {
2305        return Some(Expression::UnaryOp {
2306            op,
2307            operand: Box::new(operand_expr),
2308        });
2309    }
2310
2311    None
2312}
2313
2314/// Extract an array indexing expression.
2315fn extract_array_index(cursor: CXCursor) -> Option<Expression> {
2316    // Extract array and index expressions by visiting children
2317    let mut operands: Vec<Expression> = Vec::new();
2318    let operands_ptr = &mut operands as *mut Vec<Expression>;
2319
2320    unsafe {
2321        clang_visitChildren(cursor, visit_binary_operand, operands_ptr as CXClientData);
2322    }
2323
2324    // Array subscript should have exactly 2 operands: array and index
2325    if operands.len() != 2 {
2326        return None;
2327    }
2328
2329    Some(Expression::ArrayIndex {
2330        array: Box::new(operands[0].clone()),
2331        index: Box::new(operands[1].clone()),
2332    })
2333}
2334
2335/// Extract a field access expression (obj.field or ptr->field).
2336fn extract_field_access(cursor: CXCursor) -> Option<Expression> {
2337    // Get the field name
2338    let field_name_cxstring = unsafe { clang_getCursorSpelling(cursor) };
2339    let field = unsafe {
2340        let c_str = CStr::from_ptr(clang_getCString(field_name_cxstring));
2341        let name = c_str.to_string_lossy().into_owned();
2342        clang_disposeString(field_name_cxstring);
2343        name
2344    };
2345
2346    // Determine if this is -> or . by tokenizing
2347    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2348    if tu.is_null() {
2349        return None;
2350    }
2351
2352    let extent = unsafe { clang_getCursorExtent(cursor) };
2353    let mut tokens = ptr::null_mut();
2354    let mut num_tokens = 0;
2355
2356    unsafe {
2357        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2358    }
2359
2360    let mut is_arrow = false;
2361
2362    // Look through tokens to find the LAST '->' or '.' operator
2363    // (the rightmost operator is the one for this specific MemberRefExpr)
2364    // For nested access like r->bottom_right.x, the extent includes all tokens,
2365    // so we need the last operator, not the first
2366    for i in 0..num_tokens {
2367        unsafe {
2368            let token = *tokens.add(i as usize);
2369            let token_kind = clang_getTokenKind(token);
2370
2371            if token_kind == CXToken_Punctuation {
2372                let token_cxstring = clang_getTokenSpelling(tu, token);
2373                let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2374                if let Ok(token_str) = c_str.to_str() {
2375                    if token_str == "->" {
2376                        is_arrow = true;
2377                        // Don't break - keep looking for later operators
2378                    } else if token_str == "." {
2379                        is_arrow = false;
2380                        // Don't break - keep looking for later operators
2381                    }
2382                }
2383                clang_disposeString(token_cxstring);
2384            }
2385        }
2386    }
2387
2388    unsafe {
2389        clang_disposeTokens(tu, tokens, num_tokens);
2390    }
2391
2392    // Extract the object/pointer expression by visiting children
2393    let mut object_expr: Option<Expression> = None;
2394    let expr_ptr = &mut object_expr as *mut Option<Expression>;
2395
2396    unsafe {
2397        clang_visitChildren(cursor, visit_expression, expr_ptr as CXClientData);
2398    }
2399
2400    let object = object_expr?;
2401
2402    if is_arrow {
2403        Some(Expression::PointerFieldAccess {
2404            pointer: Box::new(object),
2405            field,
2406        })
2407    } else {
2408        Some(Expression::FieldAccess {
2409            object: Box::new(object),
2410            field,
2411        })
2412    }
2413}
2414
2415/// Extract a sizeof expression.
2416fn extract_sizeof(cursor: CXCursor) -> Option<Expression> {
2417    // Get the translation unit
2418    let tu = unsafe { clang_Cursor_getTranslationUnit(cursor) };
2419    if tu.is_null() {
2420        return None;
2421    }
2422
2423    // Get the extent (source range) of the cursor
2424    let extent = unsafe { clang_getCursorExtent(cursor) };
2425
2426    // Tokenize to find "sizeof" keyword
2427    let mut tokens = ptr::null_mut();
2428    let mut num_tokens = 0;
2429
2430    unsafe {
2431        clang_tokenize(tu, extent, &mut tokens, &mut num_tokens);
2432    }
2433
2434    let mut is_sizeof = false;
2435    let mut type_name = String::new();
2436
2437    // Look through tokens to find "sizeof" keyword and extract type name
2438    for i in 0..num_tokens {
2439        unsafe {
2440            let token = *tokens.add(i as usize);
2441            let token_kind = clang_getTokenKind(token);
2442            let token_cxstring = clang_getTokenSpelling(tu, token);
2443            let c_str = CStr::from_ptr(clang_getCString(token_cxstring));
2444
2445            if let Ok(token_str) = c_str.to_str() {
2446                if token_str == "sizeof" {
2447                    is_sizeof = true;
2448                } else if is_sizeof
2449                    && (token_kind == CXToken_Identifier || token_kind == CXToken_Keyword)
2450                    && token_str != "("
2451                    && token_str != ")"
2452                {
2453                    // This is part of the type name (e.g., "int", "Data", "struct")
2454                    if !type_name.is_empty() {
2455                        type_name.push(' ');
2456                    }
2457                    type_name.push_str(token_str);
2458                }
2459            }
2460
2461            clang_disposeString(token_cxstring);
2462        }
2463    }
2464
2465    unsafe {
2466        clang_disposeTokens(tu, tokens, num_tokens);
2467    }
2468
2469    if is_sizeof && !type_name.is_empty() {
2470        Some(Expression::Sizeof { type_name })
2471    } else {
2472        None
2473    }
2474}
2475
2476/// Convert clang type to our Type enum.
2477#[allow(non_upper_case_globals)]
2478fn convert_type(cx_type: CXType) -> Option<Type> {
2479    // SAFETY: Getting type kind
2480    match cx_type.kind {
2481        CXType_Void => Some(Type::Void),
2482        CXType_Int => Some(Type::Int),
2483        CXType_Float => Some(Type::Float),
2484        CXType_Double => Some(Type::Double),
2485        CXType_Char_S | CXType_Char_U => Some(Type::Char),
2486        CXType_Pointer => {
2487            // SAFETY: Getting pointee type
2488            let pointee = unsafe { clang_getPointeeType(cx_type) };
2489
2490            // Check if the pointee is a function - this is a function pointer
2491            if pointee.kind == CXType_FunctionProto || pointee.kind == CXType_FunctionNoProto {
2492                // This is a function pointer type
2493                // Extract return type
2494                let return_cx_type = unsafe { clang_getResultType(pointee) };
2495                let return_type = convert_type(return_cx_type)?;
2496
2497                // Extract parameter types
2498                let num_args = unsafe { clang_getNumArgTypes(pointee) };
2499                let mut param_types = Vec::new();
2500
2501                for i in 0..num_args {
2502                    let arg_type = unsafe { clang_getArgType(pointee, i as u32) };
2503                    if let Some(param_type) = convert_type(arg_type) {
2504                        param_types.push(param_type);
2505                    }
2506                }
2507
2508                return Some(Type::FunctionPointer {
2509                    param_types,
2510                    return_type: Box::new(return_type),
2511                });
2512            }
2513
2514            // Regular pointer (not function pointer)
2515            convert_type(pointee).map(|t| Type::Pointer(Box::new(t)))
2516        }
2517        CXType_FunctionProto | CXType_FunctionNoProto => {
2518            // Function type (not a pointer to function, but the function type itself)
2519            // This can occur in typedefs like: typedef int Func(int);
2520            // Extract return type
2521            let return_cx_type = unsafe { clang_getResultType(cx_type) };
2522            let return_type = convert_type(return_cx_type)?;
2523
2524            // Extract parameter types
2525            let num_args = unsafe { clang_getNumArgTypes(cx_type) };
2526            let mut param_types = Vec::new();
2527
2528            for i in 0..num_args {
2529                let arg_type = unsafe { clang_getArgType(cx_type, i as u32) };
2530                if let Some(param_type) = convert_type(arg_type) {
2531                    param_types.push(param_type);
2532                }
2533            }
2534
2535            Some(Type::FunctionPointer {
2536                param_types,
2537                return_type: Box::new(return_type),
2538            })
2539        }
2540        CXType_Record => {
2541            // SAFETY: Getting type declaration to extract struct name
2542            let decl = unsafe { clang_getTypeDeclaration(cx_type) };
2543            let name_cxstring = unsafe { clang_getCursorSpelling(decl) };
2544            let name = unsafe {
2545                let c_str = CStr::from_ptr(clang_getCString(name_cxstring));
2546                let struct_name = c_str.to_string_lossy().into_owned();
2547                clang_disposeString(name_cxstring);
2548                struct_name
2549            };
2550            Some(Type::Struct(name))
2551        }
2552        CXType_Elaborated => {
2553            // Elaborated types wrap other types (e.g., "struct Point" wraps the Record type)
2554            // Get the canonical type to unwrap it
2555            let canonical = unsafe { clang_getCanonicalType(cx_type) };
2556            convert_type(canonical)
2557        }
2558        CXType_Typedef => {
2559            // Typedef types wrap the actual underlying type
2560            // Get the canonical type to unwrap it
2561            let canonical = unsafe { clang_getCanonicalType(cx_type) };
2562            convert_type(canonical)
2563        }
2564        _ => None,
2565    }
2566}
2567
2568/// Represents a single case in a switch statement.
2569#[derive(Debug, Clone, PartialEq)]
2570pub struct SwitchCase {
2571    /// Case value expression (None for default case)
2572    pub value: Option<Expression>,
2573    /// Statements to execute for this case
2574    pub body: Vec<Statement>,
2575}
2576
2577/// Represents a C statement.
2578#[derive(Debug, Clone, PartialEq)]
2579pub enum Statement {
2580    /// Variable declaration: `int* ptr = malloc(4);`
2581    VariableDeclaration {
2582        /// Variable name
2583        name: String,
2584        /// Variable type
2585        var_type: Type,
2586        /// Optional initializer expression
2587        initializer: Option<Expression>,
2588    },
2589    /// Return statement: `return expr;`
2590    Return(Option<Expression>),
2591    /// Assignment statement: `x = 42;`
2592    Assignment {
2593        /// Target variable name
2594        target: String,
2595        /// Value expression to assign
2596        value: Expression,
2597    },
2598    /// If statement: `if (cond) { ... } else { ... }`
2599    If {
2600        /// Condition expression
2601        condition: Expression,
2602        /// Then block
2603        then_block: Vec<Statement>,
2604        /// Optional else block
2605        else_block: Option<Vec<Statement>>,
2606    },
2607    /// For loop: `for (init; cond; inc) { ... }`
2608    For {
2609        /// Optional init statement
2610        init: Option<Box<Statement>>,
2611        /// Optional condition expression
2612        condition: Option<Expression>,
2613        /// Optional increment statement
2614        increment: Option<Box<Statement>>,
2615        /// Loop body
2616        body: Vec<Statement>,
2617    },
2618    /// While loop: `while (cond) { ... }`
2619    While {
2620        /// Condition expression
2621        condition: Expression,
2622        /// Loop body
2623        body: Vec<Statement>,
2624    },
2625    /// Pointer dereference assignment: `*ptr = value;`
2626    DerefAssignment {
2627        /// Target expression to dereference
2628        target: Expression,
2629        /// Value expression to assign
2630        value: Expression,
2631    },
2632    /// Array index assignment: `arr[i] = value;`
2633    ArrayIndexAssignment {
2634        /// Array expression
2635        array: Box<Expression>,
2636        /// Index expression
2637        index: Box<Expression>,
2638        /// Value expression to assign
2639        value: Expression,
2640    },
2641    /// Field assignment: `ptr->field = value;` or `obj.field = value;`
2642    FieldAssignment {
2643        /// Object/pointer expression
2644        object: Expression,
2645        /// Field name
2646        field: String,
2647        /// Value expression to assign
2648        value: Expression,
2649    },
2650    /// Break statement: `break;`
2651    Break,
2652    /// Continue statement: `continue;`
2653    Continue,
2654    /// Switch statement: `switch (expr) { case 1: ...; default: ...; }`
2655    Switch {
2656        /// Condition expression to switch on
2657        condition: Expression,
2658        /// List of case statements
2659        cases: Vec<SwitchCase>,
2660        /// Optional default case body
2661        default_case: Option<Vec<Statement>>,
2662    },
2663    /// Post-increment statement: `ptr++;`
2664    PostIncrement {
2665        /// Target variable name
2666        target: String,
2667    },
2668    /// Pre-increment statement: `++ptr;`
2669    PreIncrement {
2670        /// Target variable name
2671        target: String,
2672    },
2673    /// Post-decrement statement: `ptr--;`
2674    PostDecrement {
2675        /// Target variable name
2676        target: String,
2677    },
2678    /// Pre-decrement statement: `--ptr;`
2679    PreDecrement {
2680        /// Target variable name
2681        target: String,
2682    },
2683    /// Compound assignment: `ptr += offset;`, `x *= 2;`, etc.
2684    CompoundAssignment {
2685        /// Target variable name
2686        target: String,
2687        /// Binary operator to apply
2688        op: BinaryOperator,
2689        /// Value expression
2690        value: Expression,
2691    },
2692    /// Function call statement: `strlen(s);`, `strcpy(dst, src);`
2693    FunctionCall {
2694        /// Function name
2695        function: String,
2696        /// Arguments
2697        arguments: Vec<Expression>,
2698    },
2699}
2700
2701impl Statement {
2702    /// Check if this statement is a string function call.
2703    pub fn is_string_function_call(&self) -> bool {
2704        match self {
2705            Statement::FunctionCall { function, .. } => {
2706                matches!(function.as_str(), "strlen" | "strcmp" | "strcpy" | "strdup")
2707            }
2708            _ => false,
2709        }
2710    }
2711
2712    /// Check if this statement is a function call.
2713    pub fn is_function_call(&self) -> bool {
2714        matches!(self, Statement::FunctionCall { .. })
2715    }
2716
2717    /// Convert this statement to a function call expression if it is one.
2718    ///
2719    /// # Implementation Status
2720    ///
2721    /// Stub implementation - always returns `None`.
2722    /// The `Statement::FunctionCall` variant doesn't store the call as an `Expression`,
2723    /// so conversion would require reconstructing an `Expression::FunctionCall` from
2724    /// the statement's fields.
2725    pub fn as_function_call(&self) -> Option<&Expression> {
2726        None
2727    }
2728}
2729
2730/// Unary operators for C expressions.
2731#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2732pub enum UnaryOperator {
2733    /// Unary minus (-x)
2734    Minus,
2735    /// Logical NOT (!x)
2736    LogicalNot,
2737    /// Bitwise NOT (~x)
2738    BitwiseNot,
2739    /// Address-of (&x)
2740    AddressOf,
2741}
2742
2743/// Binary operators for C expressions.
2744#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2745pub enum BinaryOperator {
2746    /// Addition (+)
2747    Add,
2748    /// Subtraction (-)
2749    Subtract,
2750    /// Multiplication (*)
2751    Multiply,
2752    /// Division (/)
2753    Divide,
2754    /// Modulo (%)
2755    Modulo,
2756    /// Equality (==)
2757    Equal,
2758    /// Inequality (!=)
2759    NotEqual,
2760    /// Less than (<)
2761    LessThan,
2762    /// Greater than (>)
2763    GreaterThan,
2764    /// Less than or equal (<=)
2765    LessEqual,
2766    /// Greater than or equal (>=)
2767    GreaterEqual,
2768    /// Logical AND (&&)
2769    LogicalAnd,
2770    /// Logical OR (||)
2771    LogicalOr,
2772}
2773
2774/// Represents a C expression.
2775#[derive(Debug, Clone, PartialEq)]
2776pub enum Expression {
2777    /// Integer literal: `42`
2778    IntLiteral(i32),
2779    /// String literal: `"hello"`
2780    StringLiteral(String),
2781    /// Variable reference: `x`
2782    Variable(String),
2783    /// Binary operation: `a + b`
2784    BinaryOp {
2785        /// Operator
2786        op: BinaryOperator,
2787        /// Left operand
2788        left: Box<Expression>,
2789        /// Right operand
2790        right: Box<Expression>,
2791    },
2792    /// Function call: `malloc(4)`
2793    FunctionCall {
2794        /// Function name
2795        function: String,
2796        /// Arguments
2797        arguments: Vec<Expression>,
2798    },
2799    /// Pointer dereference: `*ptr`
2800    Dereference(Box<Expression>),
2801    /// Unary operation: `-x`, `!x`
2802    UnaryOp {
2803        /// Operator
2804        op: UnaryOperator,
2805        /// Operand
2806        operand: Box<Expression>,
2807    },
2808    /// Array indexing: `arr[i]`
2809    ArrayIndex {
2810        /// Array expression
2811        array: Box<Expression>,
2812        /// Index expression
2813        index: Box<Expression>,
2814    },
2815    /// Struct field access: `obj.field`
2816    FieldAccess {
2817        /// Object expression
2818        object: Box<Expression>,
2819        /// Field name
2820        field: String,
2821    },
2822    /// Pointer field access: `ptr->field`
2823    PointerFieldAccess {
2824        /// Pointer expression
2825        pointer: Box<Expression>,
2826        /// Field name
2827        field: String,
2828    },
2829    /// Post-increment expression: `ptr++`
2830    PostIncrement {
2831        /// Operand expression
2832        operand: Box<Expression>,
2833    },
2834    /// Pre-increment expression: `++ptr`
2835    PreIncrement {
2836        /// Operand expression
2837        operand: Box<Expression>,
2838    },
2839    /// Post-decrement expression: `ptr--`
2840    PostDecrement {
2841        /// Operand expression
2842        operand: Box<Expression>,
2843    },
2844    /// Pre-decrement expression: `--ptr`
2845    PreDecrement {
2846        /// Operand expression
2847        operand: Box<Expression>,
2848    },
2849    /// Sizeof expression: `sizeof(int)` or `sizeof(struct Data)`
2850    Sizeof {
2851        /// Type name as a string (e.g., "int", "struct Data")
2852        type_name: String,
2853    },
2854}
2855
2856impl Expression {
2857    /// Check if this expression is a string function call (strlen, strcmp, strcpy, strdup).
2858    pub fn is_string_function_call(&self) -> bool {
2859        match self {
2860            Expression::FunctionCall { function, .. } => {
2861                matches!(function.as_str(), "strlen" | "strcmp" | "strcpy" | "strdup")
2862            }
2863            _ => false,
2864        }
2865    }
2866
2867    /// Get the string function name if this is a string function call.
2868    pub fn string_function_name(&self) -> Option<&str> {
2869        match self {
2870            Expression::FunctionCall { function, .. } if self.is_string_function_call() => {
2871                Some(function.as_str())
2872            }
2873            _ => None,
2874        }
2875    }
2876
2877    /// Check if this expression has a string literal argument.
2878    pub fn has_string_literal_argument(&self) -> bool {
2879        match self {
2880            Expression::FunctionCall { arguments, .. } => arguments
2881                .iter()
2882                .any(|arg| matches!(arg, Expression::StringLiteral(_))),
2883            _ => false,
2884        }
2885    }
2886}
2887
2888/// Represents a C typedef declaration.
2889#[derive(Debug, Clone, PartialEq)]
2890pub struct Typedef {
2891    /// Typedef name (the alias)
2892    pub name: String,
2893    /// Underlying type being aliased
2894    pub underlying_type: Type,
2895}
2896
2897impl Typedef {
2898    /// Create a new typedef.
2899    pub fn new(name: String, underlying_type: Type) -> Self {
2900        Self {
2901            name,
2902            underlying_type,
2903        }
2904    }
2905
2906    /// Get the typedef name.
2907    pub fn name(&self) -> &str {
2908        &self.name
2909    }
2910
2911    /// Get the underlying type as a string representation.
2912    pub fn underlying_type(&self) -> &str {
2913        // Return a string representation of the type
2914        match &self.underlying_type {
2915            Type::Void => "void",
2916            Type::Int => "int",
2917            Type::Float => "float",
2918            Type::Double => "double",
2919            Type::Char => "char",
2920            Type::Pointer(inner) => match **inner {
2921                Type::Char => "char*",
2922                Type::Int => "int*",
2923                Type::Float => "float*",
2924                Type::Double => "double*",
2925                Type::Void => "void*",
2926                _ => "pointer",
2927            },
2928            Type::Struct(name) => name,
2929            Type::FunctionPointer { .. } => "function pointer",
2930        }
2931    }
2932
2933    /// Check if this typedef is a pointer type.
2934    pub fn is_pointer(&self) -> bool {
2935        matches!(self.underlying_type, Type::Pointer(_))
2936    }
2937
2938    /// Check if this typedef is a struct type.
2939    pub fn is_struct(&self) -> bool {
2940        matches!(self.underlying_type, Type::Struct(_))
2941    }
2942
2943    /// Check if this typedef is a function pointer type.
2944    pub fn is_function_pointer(&self) -> bool {
2945        matches!(self.underlying_type, Type::FunctionPointer { .. })
2946    }
2947
2948    /// Check if this typedef is an array type.
2949    pub fn is_array(&self) -> bool {
2950        // Arrays are not yet in the Type enum, so return false for now
2951        false
2952    }
2953}
2954
2955/// Represents a struct field.
2956#[derive(Debug, Clone, PartialEq)]
2957pub struct StructField {
2958    /// Field name
2959    pub name: String,
2960    /// Field type
2961    pub field_type: Type,
2962}
2963
2964impl StructField {
2965    /// Create a new struct field.
2966    pub fn new(name: String, field_type: Type) -> Self {
2967        Self { name, field_type }
2968    }
2969
2970    /// Get the field name.
2971    pub fn name(&self) -> &str {
2972        &self.name
2973    }
2974
2975    /// Check if this field is a function pointer.
2976    pub fn is_function_pointer(&self) -> bool {
2977        matches!(self.field_type, Type::FunctionPointer { .. })
2978    }
2979}
2980
2981/// Represents a struct definition.
2982#[derive(Debug, Clone, PartialEq)]
2983pub struct Struct {
2984    /// Struct name
2985    pub name: String,
2986    /// Struct fields
2987    pub fields: Vec<StructField>,
2988}
2989
2990impl Struct {
2991    /// Create a new struct.
2992    pub fn new(name: String, fields: Vec<StructField>) -> Self {
2993        Self { name, fields }
2994    }
2995
2996    /// Get the struct name.
2997    pub fn name(&self) -> &str {
2998        &self.name
2999    }
3000
3001    /// Get the struct fields.
3002    pub fn fields(&self) -> &[StructField] {
3003        &self.fields
3004    }
3005}
3006
3007/// Represents a variable declaration.
3008#[derive(Debug, Clone, PartialEq)]
3009pub struct Variable {
3010    /// Variable name
3011    name: String,
3012    /// Variable type
3013    var_type: Type,
3014    /// Optional initializer expression
3015    initializer: Option<Expression>,
3016}
3017
3018impl Variable {
3019    /// Create a new variable.
3020    pub fn new(name: String, var_type: Type) -> Self {
3021        Self {
3022            name,
3023            var_type,
3024            initializer: None,
3025        }
3026    }
3027
3028    /// Create a new variable with an initializer.
3029    pub fn new_with_initializer(name: String, var_type: Type, initializer: Expression) -> Self {
3030        Self {
3031            name,
3032            var_type,
3033            initializer: Some(initializer),
3034        }
3035    }
3036
3037    /// Get the variable name.
3038    pub fn name(&self) -> &str {
3039        &self.name
3040    }
3041
3042    /// Get the variable type.
3043    pub fn var_type(&self) -> &Type {
3044        &self.var_type
3045    }
3046
3047    /// Check if this variable is a function pointer.
3048    pub fn is_function_pointer(&self) -> bool {
3049        matches!(self.var_type, Type::FunctionPointer { .. })
3050    }
3051
3052    /// Get the number of parameters if this is a function pointer.
3053    pub fn function_pointer_param_count(&self) -> usize {
3054        match &self.var_type {
3055            Type::FunctionPointer { param_types, .. } => param_types.len(),
3056            _ => 0,
3057        }
3058    }
3059
3060    /// Check if this function pointer has a void return type.
3061    pub fn function_pointer_has_void_return(&self) -> bool {
3062        match &self.var_type {
3063            Type::FunctionPointer { return_type, .. } => matches!(**return_type, Type::Void),
3064            _ => false,
3065        }
3066    }
3067
3068    /// Check if this variable is a string literal (char* with literal initializer).
3069    ///
3070    /// Detects patterns like: `const char* msg = "Hello";`
3071    ///
3072    /// # Implementation
3073    ///
3074    /// Checks if:
3075    /// - Type is a pointer to char (`char*`)
3076    /// - Has an initializer that is a `StringLiteral` expression
3077    ///
3078    /// Note: Const qualifier detection not yet implemented - checks all char* pointers.
3079    pub fn is_string_literal(&self) -> bool {
3080        // Check if type is char*
3081        let is_char_ptr =
3082            matches!(self.var_type, Type::Pointer(ref inner) if **inner == Type::Char);
3083
3084        // Check if initializer is a string literal
3085        if let Some(initializer) = &self.initializer {
3086            is_char_ptr && matches!(initializer, Expression::StringLiteral(_))
3087        } else {
3088            false
3089        }
3090    }
3091
3092    /// Check if this variable is a string buffer (char* allocated with malloc).
3093    ///
3094    /// Detects patterns like: `char* buffer = malloc(100);`
3095    ///
3096    /// # Implementation
3097    ///
3098    /// Checks if:
3099    /// - Type is a pointer to char (`char*`)
3100    /// - Has an initializer that is a malloc/calloc function call
3101    pub fn is_string_buffer(&self) -> bool {
3102        // Check if type is char*
3103        let is_char_ptr =
3104            matches!(self.var_type, Type::Pointer(ref inner) if **inner == Type::Char);
3105
3106        // Check if initializer is malloc/calloc call
3107        if let Some(Expression::FunctionCall { function, .. }) = &self.initializer {
3108            is_char_ptr && (function == "malloc" || function == "calloc")
3109        } else {
3110            false
3111        }
3112    }
3113
3114    /// Get the initializer expression for this variable.
3115    ///
3116    /// Returns `Some(&Expression)` if the variable has an initializer, `None` otherwise.
3117    pub fn initializer(&self) -> Option<&Expression> {
3118        self.initializer.as_ref()
3119    }
3120}
3121
3122/// Abstract Syntax Tree representing parsed C code.
3123#[derive(Debug, Clone, PartialEq)]
3124pub struct Ast {
3125    functions: Vec<Function>,
3126    typedefs: Vec<Typedef>,
3127    structs: Vec<Struct>,
3128    macros: Vec<MacroDefinition>,
3129    variables: Vec<Variable>,
3130}
3131
3132/// Represents a C macro definition (#define).
3133///
3134/// C macros come in two forms:
3135/// - **Object-like**: Simple text replacement (e.g., `#define MAX 100`)
3136/// - **Function-like**: Parameterized text replacement (e.g., `#define SQR(x) ((x) * (x))`)
3137///
3138/// # Examples
3139///
3140/// ```no_run
3141/// use decy_parser::parser::{CParser, MacroDefinition};
3142///
3143/// // Parse a simple object-like macro
3144/// let parser = CParser::new()?;
3145/// let ast = parser.parse("#define MAX 100\nint main() { return 0; }")?;
3146/// assert_eq!(ast.macros().len(), 1);
3147/// assert_eq!(ast.macros()[0].name(), "MAX");
3148/// assert!(ast.macros()[0].is_object_like());
3149///
3150/// // Parse a function-like macro
3151/// let ast2 = parser.parse("#define SQR(x) ((x) * (x))\nint main() { return 0; }")?;
3152/// assert_eq!(ast2.macros()[0].name(), "SQR");
3153/// assert!(ast2.macros()[0].is_function_like());
3154/// assert_eq!(ast2.macros()[0].parameters(), &["x"]);
3155/// # Ok::<(), anyhow::Error>(())
3156/// ```
3157///
3158/// # Reference
3159///
3160/// K&R §4.11, ISO C99 §6.10.3
3161#[derive(Debug, Clone, PartialEq)]
3162pub struct MacroDefinition {
3163    /// Macro name
3164    pub name: String,
3165    /// Parameters (empty for object-like macros)
3166    pub parameters: Vec<String>,
3167    /// Macro body (unparsed, tokenized without spaces)
3168    pub body: String,
3169}
3170
3171impl MacroDefinition {
3172    /// Create a new object-like macro.
3173    pub fn new_object_like(name: String, body: String) -> Self {
3174        Self {
3175            name,
3176            parameters: vec![],
3177            body,
3178        }
3179    }
3180
3181    /// Create a new function-like macro.
3182    pub fn new_function_like(name: String, parameters: Vec<String>, body: String) -> Self {
3183        Self {
3184            name,
3185            parameters,
3186            body,
3187        }
3188    }
3189
3190    /// Get the macro name.
3191    pub fn name(&self) -> &str {
3192        &self.name
3193    }
3194
3195    /// Get the macro parameters.
3196    pub fn parameters(&self) -> &[String] {
3197        &self.parameters
3198    }
3199
3200    /// Get the macro body.
3201    pub fn body(&self) -> &str {
3202        &self.body
3203    }
3204
3205    /// Check if this is a function-like macro.
3206    pub fn is_function_like(&self) -> bool {
3207        !self.parameters.is_empty()
3208    }
3209
3210    /// Check if this is an object-like macro.
3211    pub fn is_object_like(&self) -> bool {
3212        self.parameters.is_empty()
3213    }
3214}
3215
3216impl Ast {
3217    /// Create a new empty AST.
3218    pub fn new() -> Self {
3219        Self {
3220            functions: Vec::new(),
3221            typedefs: Vec::new(),
3222            structs: Vec::new(),
3223            macros: Vec::new(),
3224            variables: Vec::new(),
3225        }
3226    }
3227
3228    /// Get the functions in the AST.
3229    pub fn functions(&self) -> &[Function] {
3230        &self.functions
3231    }
3232
3233    /// Add a function to the AST.
3234    pub fn add_function(&mut self, function: Function) {
3235        self.functions.push(function);
3236    }
3237
3238    /// Get the typedefs in the AST.
3239    pub fn typedefs(&self) -> &[Typedef] {
3240        &self.typedefs
3241    }
3242
3243    /// Add a typedef to the AST.
3244    pub fn add_typedef(&mut self, typedef: Typedef) {
3245        self.typedefs.push(typedef);
3246    }
3247
3248    /// Get the structs in the AST.
3249    pub fn structs(&self) -> &[Struct] {
3250        &self.structs
3251    }
3252
3253    /// Add a struct to the AST.
3254    pub fn add_struct(&mut self, struct_def: Struct) {
3255        self.structs.push(struct_def);
3256    }
3257
3258    /// Get the macro definitions in the AST.
3259    pub fn macros(&self) -> &[MacroDefinition] {
3260        &self.macros
3261    }
3262
3263    /// Add a macro definition to the AST.
3264    pub fn add_macro(&mut self, macro_def: MacroDefinition) {
3265        self.macros.push(macro_def);
3266    }
3267
3268    /// Get the variables in the AST.
3269    pub fn variables(&self) -> &[Variable] {
3270        &self.variables
3271    }
3272
3273    /// Add a variable to the AST.
3274    pub fn add_variable(&mut self, variable: Variable) {
3275        self.variables.push(variable);
3276    }
3277}
3278
3279impl Default for Ast {
3280    fn default() -> Self {
3281        Self::new()
3282    }
3283}
3284
3285/// Represents a C function.
3286#[derive(Debug, Clone, PartialEq)]
3287pub struct Function {
3288    /// Function name
3289    pub name: String,
3290    /// Return type
3291    pub return_type: Type,
3292    /// Parameters
3293    pub parameters: Vec<Parameter>,
3294    /// Function body (statements)
3295    pub body: Vec<Statement>,
3296}
3297
3298impl Function {
3299    /// Create a new function.
3300    pub fn new(name: String, return_type: Type, parameters: Vec<Parameter>) -> Self {
3301        Self {
3302            name,
3303            return_type,
3304            parameters,
3305            body: Vec::new(),
3306        }
3307    }
3308
3309    /// Create a new function with body.
3310    pub fn new_with_body(
3311        name: String,
3312        return_type: Type,
3313        parameters: Vec<Parameter>,
3314        body: Vec<Statement>,
3315    ) -> Self {
3316        Self {
3317            name,
3318            return_type,
3319            parameters,
3320            body,
3321        }
3322    }
3323}
3324
3325/// Represents a C type.
3326#[derive(Debug, Clone, PartialEq)]
3327pub enum Type {
3328    /// void
3329    Void,
3330    /// int
3331    Int,
3332    /// float
3333    Float,
3334    /// double
3335    Double,
3336    /// char
3337    Char,
3338    /// Pointer to a type
3339    Pointer(Box<Type>),
3340    /// Struct type (e.g., struct Point)
3341    Struct(String),
3342    /// Function pointer type (e.g., int (*callback)(int))
3343    FunctionPointer {
3344        /// Parameter types
3345        param_types: Vec<Type>,
3346        /// Return type
3347        return_type: Box<Type>,
3348    },
3349}
3350
3351/// Represents a function parameter.
3352#[derive(Debug, Clone, PartialEq)]
3353pub struct Parameter {
3354    /// Parameter name
3355    pub name: String,
3356    /// Parameter type
3357    pub param_type: Type,
3358}
3359
3360impl Parameter {
3361    /// Create a new parameter.
3362    pub fn new(name: String, param_type: Type) -> Self {
3363        Self { name, param_type }
3364    }
3365
3366    /// Check if this parameter is a function pointer.
3367    pub fn is_function_pointer(&self) -> bool {
3368        matches!(self.param_type, Type::FunctionPointer { .. })
3369    }
3370
3371    /// Check if this parameter is a const char pointer (const char*).
3372    ///
3373    /// # Implementation Status
3374    ///
3375    /// Partial implementation - detects `char*` pointers but doesn't check const qualifier.
3376    /// Returns `true` for any `Pointer(Char)` type.
3377    /// Full implementation requires adding const tracking to the `Type` enum.
3378    pub fn is_const_char_pointer(&self) -> bool {
3379        matches!(self.param_type, Type::Pointer(ref inner) if matches!(**inner, Type::Char))
3380    }
3381}
3382
3383#[cfg(test)]
3384#[path = "parser_tests.rs"]
3385mod parser_tests;
3386
3387#[cfg(test)]
3388#[path = "pointer_arithmetic_tests.rs"]
3389mod pointer_arithmetic_tests;
3390
3391#[cfg(test)]
3392#[path = "break_continue_tests.rs"]
3393mod break_continue_tests;