codegraph_c/pipeline/
macros.rs

1//! Kernel macro expansion and neutralization
2//!
3//! Handles common Linux kernel macros that cause parsing errors by either:
4//! 1. Expanding them to valid C syntax
5//! 2. Replacing them with neutral stubs
6//! 3. Stripping them entirely when safe
7
8// Allow dead code for regex patterns that are defined for future use
9#![allow(dead_code)]
10
11use regex::Regex;
12use std::sync::LazyLock;
13
14/// Macro neutralizer for kernel code
15pub struct MacroNeutralizer {
16    /// Statistics for tracking transformations
17    stats: MacroStats,
18}
19
20/// Statistics about macro transformations
21#[derive(Debug, Default, Clone)]
22pub struct MacroStats {
23    pub likely_unlikely_stripped: usize,
24    pub container_of_expanded: usize,
25    pub for_each_expanded: usize,
26    pub list_for_each_expanded: usize,
27    pub build_bug_on_stripped: usize,
28    pub warn_on_stripped: usize,
29    pub rcu_simplified: usize,
30    pub typeof_replaced: usize,
31    pub define_macros_stubbed: usize,
32    pub statement_expressions_simplified: usize,
33}
34
35// Regex patterns for macro detection
36
37// likely(x) / unlikely(x) -> (x)
38static RE_LIKELY: LazyLock<Regex> =
39    LazyLock::new(|| Regex::new(r"\b(likely|unlikely)\s*\(").unwrap());
40
41// BUILD_BUG_ON(...) -> ((void)0)
42// Use [\s\S] to match across newlines, and match balanced parens
43static RE_BUILD_BUG_ON: LazyLock<Regex> =
44    LazyLock::new(|| Regex::new(r"\bBUILD_BUG_ON\s*\(").unwrap());
45
46// BUILD_BUG_ON_MSG(...) -> ((void)0)
47static RE_BUILD_BUG_ON_MSG: LazyLock<Regex> =
48    LazyLock::new(|| Regex::new(r"\bBUILD_BUG_ON_MSG\s*\(").unwrap());
49
50// WARN_ON(x) -> (x)
51static RE_WARN_ON: LazyLock<Regex> =
52    LazyLock::new(|| Regex::new(r"\bWARN_ON(?:_ONCE)?\s*\(").unwrap());
53
54// BUG_ON(x) -> ((void)(x))
55static RE_BUG_ON: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bBUG_ON\s*\(").unwrap());
56
57// typeof(x) -> __typeof_placeholder__
58static RE_TYPEOF: LazyLock<Regex> =
59    LazyLock::new(|| Regex::new(r"\b(typeof|__typeof__|__typeof)\s*\(").unwrap());
60
61// DEFINE_MUTEX(name) -> struct mutex name
62static RE_DEFINE_MUTEX: LazyLock<Regex> =
63    LazyLock::new(|| Regex::new(r"\bDEFINE_MUTEX\s*\(\s*(\w+)\s*\)").unwrap());
64
65// DEFINE_SPINLOCK(name) -> spinlock_t name
66static RE_DEFINE_SPINLOCK: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r"\bDEFINE_SPINLOCK\s*\(\s*(\w+)\s*\)").unwrap());
68
69// DEFINE_RWLOCK(name) -> rwlock_t name
70static RE_DEFINE_RWLOCK: LazyLock<Regex> =
71    LazyLock::new(|| Regex::new(r"\bDEFINE_RWLOCK\s*\(\s*(\w+)\s*\)").unwrap());
72
73// DEFINE_SEMAPHORE(name) -> struct semaphore name
74static RE_DEFINE_SEMAPHORE: LazyLock<Regex> =
75    LazyLock::new(|| Regex::new(r"\bDEFINE_SEMAPHORE\s*\(\s*(\w+)\s*\)").unwrap());
76
77// DEFINE_IDA(name) -> struct ida name
78static RE_DEFINE_IDA: LazyLock<Regex> =
79    LazyLock::new(|| Regex::new(r"\bDEFINE_IDA\s*\(\s*(\w+)\s*\)").unwrap());
80
81// DEFINE_IDR(name) -> struct idr name
82static RE_DEFINE_IDR: LazyLock<Regex> =
83    LazyLock::new(|| Regex::new(r"\bDEFINE_IDR\s*\(\s*(\w+)\s*\)").unwrap());
84
85// DECLARE_BITMAP(name, bits) -> unsigned long name[bits/64 + 1]
86static RE_DECLARE_BITMAP: LazyLock<Regex> =
87    LazyLock::new(|| Regex::new(r"\bDECLARE_BITMAP\s*\(\s*(\w+)\s*,\s*(\w+)\s*\)").unwrap());
88
89// DECLARE_WAIT_QUEUE_HEAD(name) -> wait_queue_head_t name
90static RE_DECLARE_WAIT_QUEUE: LazyLock<Regex> =
91    LazyLock::new(|| Regex::new(r"\bDECLARE_WAIT_QUEUE_HEAD\s*\(\s*(\w+)\s*\)").unwrap());
92
93// LIST_HEAD(name) -> struct list_head name
94static RE_LIST_HEAD: LazyLock<Regex> =
95    LazyLock::new(|| Regex::new(r"\bLIST_HEAD\s*\(\s*(\w+)\s*\)").unwrap());
96
97// HLIST_HEAD(name) -> struct hlist_head name
98static RE_HLIST_HEAD: LazyLock<Regex> =
99    LazyLock::new(|| Regex::new(r"\bHLIST_HEAD\s*\(\s*(\w+)\s*\)").unwrap());
100
101// Statement expression ({ ... }) - simplified pattern for outer level
102static RE_STMT_EXPR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\(\{[^{}]*\}\)").unwrap());
103
104// rcu_dereference(p) -> (p)
105static RE_RCU_DEREF: LazyLock<Regex> =
106    LazyLock::new(|| Regex::new(r"\brcu_dereference(?:_protected|_raw|_check)?\s*\(").unwrap());
107
108// rcu_assign_pointer(p, v) -> ((p) = (v))
109static RE_RCU_ASSIGN: LazyLock<Regex> =
110    LazyLock::new(|| Regex::new(r"\brcu_assign_pointer\s*\(").unwrap());
111
112// rcu_read_lock() / rcu_read_unlock() -> empty statement
113static RE_RCU_READ_LOCK: LazyLock<Regex> =
114    LazyLock::new(|| Regex::new(r"\brcu_read_lock\s*\(\s*\)").unwrap());
115
116static RE_RCU_READ_UNLOCK: LazyLock<Regex> =
117    LazyLock::new(|| Regex::new(r"\brcu_read_unlock\s*\(\s*\)").unwrap());
118
119// synchronize_rcu() -> empty
120static RE_SYNCHRONIZE_RCU: LazyLock<Regex> =
121    LazyLock::new(|| Regex::new(r"\bsynchronize_rcu\s*\(\s*\)").unwrap());
122
123// READ_ONCE(x) / WRITE_ONCE(x, v) -> simplified
124static RE_READ_ONCE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bREAD_ONCE\s*\(").unwrap());
125
126static RE_WRITE_ONCE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bWRITE_ONCE\s*\(").unwrap());
127
128// ACCESS_ONCE(x) -> (x)
129static RE_ACCESS_ONCE: LazyLock<Regex> =
130    LazyLock::new(|| Regex::new(r"\bACCESS_ONCE\s*\(").unwrap());
131
132// smp_load_acquire / smp_store_release
133static RE_SMP_LOAD: LazyLock<Regex> =
134    LazyLock::new(|| Regex::new(r"\bsmp_load_acquire\s*\(").unwrap());
135
136static RE_SMP_STORE: LazyLock<Regex> =
137    LazyLock::new(|| Regex::new(r"\bsmp_store_release\s*\(").unwrap());
138
139// ARRAY_SIZE(arr) -> (sizeof(arr)/sizeof((arr)[0]))
140static RE_ARRAY_SIZE: LazyLock<Regex> =
141    LazyLock::new(|| Regex::new(r"\bARRAY_SIZE\s*\(\s*(\w+)\s*\)").unwrap());
142
143// sizeof_field(type, member) -> sizeof(((type*)0)->member)
144static RE_SIZEOF_FIELD: LazyLock<Regex> =
145    LazyLock::new(|| Regex::new(r"\bsizeof_field\s*\(").unwrap());
146
147// BIT(n) -> (1UL << (n))
148static RE_BIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bBIT\s*\(").unwrap());
149
150// BIT_ULL(n) -> (1ULL << (n))
151static RE_BIT_ULL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bBIT_ULL\s*\(").unwrap());
152
153// GENMASK(h, l) -> (((~0UL) >> (BITS_PER_LONG - 1 - (h))) & ((~0UL) << (l)))
154static RE_GENMASK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bGENMASK\s*\(").unwrap());
155
156// GENMASK_ULL(h, l)
157static RE_GENMASK_ULL: LazyLock<Regex> =
158    LazyLock::new(|| Regex::new(r"\bGENMASK_ULL\s*\(").unwrap());
159
160// FIELD_PREP(mask, val) -> (((val) << __bf_shf(mask)) & (mask))
161static RE_FIELD_PREP: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bFIELD_PREP\s*\(").unwrap());
162
163// FIELD_GET(mask, val) -> (((val) & (mask)) >> __bf_shf(mask))
164static RE_FIELD_GET: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bFIELD_GET\s*\(").unwrap());
165
166// IS_ENABLED(CONFIG_...) -> (0) or (1) - we'll use 0 for safety
167static RE_IS_ENABLED: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bIS_ENABLED\s*\(").unwrap());
168
169// Token concatenation ## in macro bodies - often in multi-line macros
170static RE_TOKEN_CONCAT: LazyLock<Regex> =
171    LazyLock::new(|| Regex::new(r"(\w+)\s*##\s*(\w+)").unwrap());
172
173// Macro continuation lines (backslash at end of line)
174static RE_MACRO_CONTINUATION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\\s*\n").unwrap());
175
176// #define macros - these should be removed or commented out
177// Match #define until end of logical line (handling continuations)
178static RE_DEFINE_DIRECTIVE: LazyLock<Regex> = LazyLock::new(|| {
179    // Match #define and everything until end of line (including continuations)
180    Regex::new(r"^\s*#\s*define\s+\w+(?:\([^)]*\))?\s*(?:\\[\s]*\n[^\n]*)*[^\n]*").unwrap()
181});
182
183// min/max macros
184static RE_MIN_T: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bmin_t\s*\(\s*\w+\s*,").unwrap());
185
186static RE_MAX_T: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bmax_t\s*\(\s*\w+\s*,").unwrap());
187
188// clamp macros
189static RE_CLAMP: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bclamp\s*\(").unwrap());
190
191// offsetof - usually OK but sometimes causes issues
192static RE_OFFSETOF: LazyLock<Regex> =
193    LazyLock::new(|| Regex::new(r"\boffsetof\s*\(\s*(\w+)\s*,\s*(\w+)\s*\)").unwrap());
194
195// IS_ERR/PTR_ERR/ERR_PTR
196static RE_IS_ERR: LazyLock<Regex> =
197    LazyLock::new(|| Regex::new(r"\bIS_ERR(?:_OR_NULL)?\s*\(").unwrap());
198
199static RE_PTR_ERR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bPTR_ERR\s*\(").unwrap());
200
201static RE_ERR_PTR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bERR_PTR\s*\(").unwrap());
202
203// ERR_CAST
204static RE_ERR_CAST: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bERR_CAST\s*\(").unwrap());
205
206// __must_check, __always_inline, etc.
207static RE_MUST_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__must_check\b").unwrap());
208
209static RE_ALWAYS_INLINE: LazyLock<Regex> =
210    LazyLock::new(|| Regex::new(r"\b__always_inline\b").unwrap());
211
212static RE_ALWAYS_UNUSED: LazyLock<Regex> =
213    LazyLock::new(|| Regex::new(r"\b__always_unused\b").unwrap());
214
215static RE_MAYBE_UNUSED: LazyLock<Regex> =
216    LazyLock::new(|| Regex::new(r"\b__maybe_unused\b").unwrap());
217
218static RE_NOINLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__noinline\b").unwrap());
219
220static RE_COLD: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__cold\b").unwrap());
221
222static RE_HOT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__hot\b").unwrap());
223
224static RE_PURE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__pure\b").unwrap());
225
226static RE_INIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__init\b").unwrap());
227
228static RE_EXIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__exit\b").unwrap());
229
230static RE_INITDATA: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__initdata\b").unwrap());
231
232static RE_DEVINIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__devinit\b").unwrap());
233
234static RE_DEVEXIT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__devexit\b").unwrap());
235
236static RE_USER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__user\b").unwrap());
237
238static RE_KERNEL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__kernel\b").unwrap());
239
240static RE_IOMEM: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__iomem\b").unwrap());
241
242static RE_PERCPU: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__percpu\b").unwrap());
243
244static RE_RCU: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__rcu\b").unwrap());
245
246static RE_BITWISE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__bitwise\b").unwrap());
247
248static RE_FORCE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__force\b").unwrap());
249
250static RE_NOCAST: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__nocast\b").unwrap());
251
252static RE_SAFE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__safe\b").unwrap());
253
254static RE_DEPRECATED: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__deprecated\b").unwrap());
255
256static RE_READ_MOSTLY: LazyLock<Regex> =
257    LazyLock::new(|| Regex::new(r"\b__read_mostly\b").unwrap());
258
259static RE_CACHELINE_ALIGNED: LazyLock<Regex> =
260    LazyLock::new(|| Regex::new(r"\b____cacheline_aligned(?:_in_smp)?\b").unwrap());
261
262static RE_ACQUIRES: LazyLock<Regex> =
263    LazyLock::new(|| Regex::new(r"\b__acquires\s*\([^)]*\)").unwrap());
264
265static RE_RELEASES: LazyLock<Regex> =
266    LazyLock::new(|| Regex::new(r"\b__releases\s*\([^)]*\)").unwrap());
267
268static RE_MUST_HOLD: LazyLock<Regex> =
269    LazyLock::new(|| Regex::new(r"\b__must_hold\s*\([^)]*\)").unwrap());
270
271// __free(cleanup_func) - kernel cleanup attribute
272// Used like: struct foo *p __free(kfree) = NULL;
273static RE_FREE_ATTR: LazyLock<Regex> =
274    LazyLock::new(|| Regex::new(r"\b__free\s*\([^)]*\)").unwrap());
275
276// __cleanup(func) - another cleanup attribute variant
277static RE_CLEANUP_ATTR: LazyLock<Regex> =
278    LazyLock::new(|| Regex::new(r"\b__cleanup\s*\([^)]*\)").unwrap());
279
280// _cleanup_* macros - GCC cleanup attribute pattern (used in NVMe, systemd, etc.)
281// e.g., _cleanup_free_, _cleanup_close_, _cleanup_nvme_global_ctx_
282static RE_CLEANUP_UNDERSCORE: LazyLock<Regex> =
283    LazyLock::new(|| Regex::new(r"\b_cleanup_\w+_\b").unwrap());
284
285// Windows calling conventions and modifiers
286static RE_CDECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b_cdecl\b").unwrap());
287static RE_STDCALL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\b__stdcall\b").unwrap());
288static RE_PASCAL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bPASCAL\b").unwrap());
289static RE_FAR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bFAR\b").unwrap());
290static RE_NEAR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bNEAR\b").unwrap());
291static RE_WINAPI: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bWINAPI\b").unwrap());
292static RE_CALLBACK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\bCALLBACK\b").unwrap());
293
294// Iterator macros - these generate complex loop constructs
295// list_for_each_entry(pos, head, member) -> for (pos = ...; pos; pos = ...)
296static RE_LIST_FOR_EACH: LazyLock<Regex> = LazyLock::new(|| {
297    Regex::new(r"\blist_for_each_entry(?:_safe|_reverse|_continue|_rcu|_from)?(?:_safe)?\s*\(")
298        .unwrap()
299});
300
301// hlist variants
302static RE_HLIST_FOR_EACH: LazyLock<Regex> = LazyLock::new(|| {
303    Regex::new(r"\bhlist_for_each_entry(?:_safe|_rcu|_continue)?(?:_safe)?\s*\(").unwrap()
304});
305
306// for_each_* macros (very common in kernel and userspace libraries)
307// Matches: for_each_*, ice_for_each_*, nvme_for_each_*, *_for_each_*
308static RE_FOR_EACH: LazyLock<Regex> =
309    LazyLock::new(|| Regex::new(r"\b\w*_?for_each_\w+\s*\(").unwrap());
310
311// for_each_set_bit and similar
312static RE_FOR_EACH_BIT: LazyLock<Regex> =
313    LazyLock::new(|| Regex::new(r"\bfor_each_(?:set_bit|clear_bit)\s*\(").unwrap());
314
315// radix_tree_for_each_* macros
316static RE_RADIX_TREE_FOR_EACH: LazyLock<Regex> =
317    LazyLock::new(|| Regex::new(r"\bradix_tree_for_each_\w+\s*\(").unwrap());
318
319// xa_for_each_* (xarray) macros
320static RE_XA_FOR_EACH: LazyLock<Regex> =
321    LazyLock::new(|| Regex::new(r"\bxa_for_each(?:_start|_range|_marked)?\s*\(").unwrap());
322
323// container_of(ptr, type, member) -> ((type *)((char *)(ptr) - offsetof(type, member)))
324static RE_CONTAINER_OF: LazyLock<Regex> =
325    LazyLock::new(|| Regex::new(r"\bcontainer_of\s*\(").unwrap());
326
327// ALL_CAPS macros used as struct initializers (ICE_VSI_STAT, ICE_PF_STAT, etc.)
328// Pattern: CAPS_CAPS or CAPS_CAPS_CAPS followed by (
329// These are compound literal macros that expand to { .field = value, ... }
330static RE_CAPS_MACRO: LazyLock<Regex> =
331    LazyLock::new(|| Regex::new(r"\b[A-Z][A-Z0-9]*(?:_[A-Z][A-Z0-9]*)+\s*\(").unwrap());
332
333// EXPORT_SYMBOL and variants
334static RE_EXPORT_SYMBOL: LazyLock<Regex> =
335    LazyLock::new(|| Regex::new(r"^\s*EXPORT_SYMBOL(?:_GPL)?(?:_NS)?\s*\([^)]+\)\s*;").unwrap());
336
337// MODULE_* macros
338static RE_MODULE_MACRO: LazyLock<Regex> = LazyLock::new(|| {
339    Regex::new(
340        r"^\s*MODULE_(?:LICENSE|AUTHOR|DESCRIPTION|VERSION|ALIAS|DEVICE_TABLE)\s*\([^)]*\)\s*;",
341    )
342    .unwrap()
343});
344
345impl MacroNeutralizer {
346    pub fn new() -> Self {
347        Self {
348            stats: MacroStats::default(),
349        }
350    }
351
352    /// Get current statistics
353    pub fn stats(&self) -> &MacroStats {
354        &self.stats
355    }
356
357    /// Process source code and neutralize problematic macros
358    pub fn neutralize(&mut self, source: &str) -> String {
359        let mut result = source.to_string();
360
361        // Phase 1: Strip annotation macros (these just annotate, no semantic meaning)
362        result = self.strip_annotation_macros(&result);
363
364        // Phase 2: Expand likely/unlikely (just strip the wrapper)
365        result = self.expand_likely_unlikely(&result);
366
367        // Phase 3: Handle BUILD_BUG_ON family
368        result = self.handle_build_bug_on(&result);
369
370        // Phase 4: Handle WARN_ON/BUG_ON
371        result = self.handle_warn_bug_on(&result);
372
373        // Phase 5: Expand DEFINE_* macros to declarations
374        result = self.expand_define_macros(&result);
375
376        // Phase 6: Simplify RCU macros
377        result = self.simplify_rcu(&result);
378
379        // Phase 7: Handle memory ordering macros
380        result = self.simplify_memory_ordering(&result);
381
382        // Phase 8: Handle typeof
383        result = self.handle_typeof(&result);
384
385        // Phase 9: Handle error pointer macros
386        result = self.handle_error_pointers(&result);
387
388        // Phase 10: Simplify statement expressions (limited - only simple cases)
389        result = self.simplify_statement_expressions(&result);
390
391        // Phase 11: Handle misc macros (ARRAY_SIZE, etc)
392        result = self.handle_misc_macros(&result);
393
394        // Phase 12: Handle iterator macros (for_each_*, list_for_each_*)
395        result = self.handle_iterator_macros(&result);
396
397        // Phase 13: Handle container_of macro
398        result = self.handle_container_of(&result);
399
400        // Phase 14: Handle module/export macros
401        result = self.handle_module_macros(&result);
402
403        // Phase 15: Handle remaining CAPS_CAPS macros (compound literals for struct init)
404        result = self.handle_caps_macros(&result);
405
406        result
407    }
408
409    fn strip_annotation_macros(&self, source: &str) -> String {
410        let mut result = source.to_string();
411
412        // Strip all the annotation keywords
413        let annotations = [
414            (&*RE_MUST_CHECK, ""),
415            (&*RE_ALWAYS_INLINE, "inline"),
416            (&*RE_ALWAYS_UNUSED, ""),
417            (&*RE_MAYBE_UNUSED, ""),
418            (&*RE_NOINLINE, ""),
419            (&*RE_COLD, ""),
420            (&*RE_HOT, ""),
421            (&*RE_PURE, ""),
422            (&*RE_INIT, ""),
423            (&*RE_EXIT, ""),
424            (&*RE_INITDATA, ""),
425            (&*RE_DEVINIT, ""),
426            (&*RE_DEVEXIT, ""),
427            (&*RE_USER, ""),
428            (&*RE_KERNEL, ""),
429            (&*RE_IOMEM, ""),
430            (&*RE_PERCPU, ""),
431            (&*RE_RCU, ""),
432            (&*RE_BITWISE, ""),
433            (&*RE_FORCE, ""),
434            (&*RE_NOCAST, ""),
435            (&*RE_SAFE, ""),
436            (&*RE_DEPRECATED, ""),
437            (&*RE_READ_MOSTLY, ""),
438            (&*RE_CACHELINE_ALIGNED, ""),
439            (&*RE_ACQUIRES, ""),
440            (&*RE_RELEASES, ""),
441            (&*RE_MUST_HOLD, ""),
442            (&*RE_FREE_ATTR, ""),
443            (&*RE_CLEANUP_ATTR, ""),
444            (&*RE_CLEANUP_UNDERSCORE, ""),
445            // Windows calling conventions
446            (&*RE_CDECL, ""),
447            (&*RE_STDCALL, ""),
448            (&*RE_PASCAL, ""),
449            (&*RE_FAR, ""),
450            (&*RE_NEAR, ""),
451            (&*RE_WINAPI, ""),
452            (&*RE_CALLBACK, ""),
453        ];
454
455        for (re, replacement) in annotations {
456            result = re.replace_all(&result, replacement).to_string();
457        }
458
459        result
460    }
461
462    fn expand_likely_unlikely(&mut self, source: &str) -> String {
463        // likely(x) -> (x), unlikely(x) -> (x)
464        let mut result = source.to_string();
465
466        // Count occurrences for stats
467        let count = RE_LIKELY.find_iter(&result).count();
468        self.stats.likely_unlikely_stripped += count;
469
470        // Replace likely( and unlikely( with just (
471        result = RE_LIKELY.replace_all(&result, "(").to_string();
472
473        result
474    }
475
476    fn handle_build_bug_on(&mut self, source: &str) -> String {
477        // Replace BUILD_BUG_ON(...) with ((void)0) using balanced paren matching
478        let result = self.replace_macro_with_void(source, &RE_BUILD_BUG_ON);
479        let count1 = RE_BUILD_BUG_ON.find_iter(source).count();
480
481        let result = self.replace_macro_with_void(&result, &RE_BUILD_BUG_ON_MSG);
482        let count2 = RE_BUILD_BUG_ON_MSG.find_iter(source).count();
483
484        self.stats.build_bug_on_stripped += count1 + count2;
485
486        result
487    }
488
489    /// Replace a macro call with ((void)0), using balanced parenthesis matching
490    fn replace_macro_with_void(&self, source: &str, pattern: &Regex) -> String {
491        let mut result = String::new();
492        let mut last_end = 0;
493
494        for m in pattern.find_iter(source) {
495            result.push_str(&source[last_end..m.start()]);
496
497            // Find the matching closing paren
498            let remaining = &source[m.end()..];
499            if let Some(paren_end) = self.find_matching_paren(remaining) {
500                result.push_str("((void)0)");
501                last_end = m.end() + paren_end + 1; // +1 for the closing paren
502            } else {
503                // Couldn't find matching paren, keep original
504                result.push_str(m.as_str());
505                last_end = m.end();
506            }
507        }
508
509        result.push_str(&source[last_end..]);
510        result
511    }
512
513    fn handle_warn_bug_on(&mut self, source: &str) -> String {
514        let mut result = source.to_string();
515
516        let warn_count = RE_WARN_ON.find_iter(&result).count();
517        let bug_count = RE_BUG_ON.find_iter(&result).count();
518        self.stats.warn_on_stripped += warn_count + bug_count;
519
520        // WARN_ON(x) -> (x) - keep the condition but remove the macro
521        // We need to be careful with nested parentheses
522        result = self.replace_macro_with_arg(&result, &RE_WARN_ON);
523        result = self.replace_macro_with_arg(&result, &RE_BUG_ON);
524
525        result
526    }
527
528    fn expand_define_macros(&mut self, source: &str) -> String {
529        let mut result = source.to_string();
530        let mut count = 0;
531
532        // DEFINE_MUTEX(name) -> struct mutex name = { 0 }
533        count += RE_DEFINE_MUTEX.find_iter(&result).count();
534        result = RE_DEFINE_MUTEX
535            .replace_all(&result, "struct mutex $1 = { 0 }")
536            .to_string();
537
538        // DEFINE_SPINLOCK(name) -> spinlock_t name = { 0 }
539        count += RE_DEFINE_SPINLOCK.find_iter(&result).count();
540        result = RE_DEFINE_SPINLOCK
541            .replace_all(&result, "spinlock_t $1 = { 0 }")
542            .to_string();
543
544        // DEFINE_RWLOCK(name) -> rwlock_t name = { 0 }
545        count += RE_DEFINE_RWLOCK.find_iter(&result).count();
546        result = RE_DEFINE_RWLOCK
547            .replace_all(&result, "rwlock_t $1 = { 0 }")
548            .to_string();
549
550        // DEFINE_SEMAPHORE(name) -> struct semaphore name = { 0 }
551        count += RE_DEFINE_SEMAPHORE.find_iter(&result).count();
552        result = RE_DEFINE_SEMAPHORE
553            .replace_all(&result, "struct semaphore $1 = { 0 }")
554            .to_string();
555
556        // DEFINE_IDA(name) -> struct ida name = { 0 }
557        count += RE_DEFINE_IDA.find_iter(&result).count();
558        result = RE_DEFINE_IDA
559            .replace_all(&result, "struct ida $1 = { 0 }")
560            .to_string();
561
562        // DEFINE_IDR(name) -> struct idr name = { 0 }
563        count += RE_DEFINE_IDR.find_iter(&result).count();
564        result = RE_DEFINE_IDR
565            .replace_all(&result, "struct idr $1 = { 0 }")
566            .to_string();
567
568        // DECLARE_BITMAP(name, bits) -> unsigned long name[1]
569        count += RE_DECLARE_BITMAP.find_iter(&result).count();
570        result = RE_DECLARE_BITMAP
571            .replace_all(&result, "unsigned long $1[1]")
572            .to_string();
573
574        // DECLARE_WAIT_QUEUE_HEAD(name) -> wait_queue_head_t name = { 0 }
575        count += RE_DECLARE_WAIT_QUEUE.find_iter(&result).count();
576        result = RE_DECLARE_WAIT_QUEUE
577            .replace_all(&result, "wait_queue_head_t $1 = { 0 }")
578            .to_string();
579
580        // LIST_HEAD(name) -> struct list_head name = { &name, &name }
581        count += RE_LIST_HEAD.find_iter(&result).count();
582        result = RE_LIST_HEAD
583            .replace_all(&result, "struct list_head $1 = { 0 }")
584            .to_string();
585
586        // HLIST_HEAD(name) -> struct hlist_head name = { 0 }
587        count += RE_HLIST_HEAD.find_iter(&result).count();
588        result = RE_HLIST_HEAD
589            .replace_all(&result, "struct hlist_head $1 = { 0 }")
590            .to_string();
591
592        self.stats.define_macros_stubbed += count;
593        result
594    }
595
596    fn simplify_rcu(&mut self, source: &str) -> String {
597        let mut result = source.to_string();
598
599        // Count all RCU patterns
600        let count = RE_RCU_DEREF.find_iter(&result).count()
601            + RE_RCU_ASSIGN.find_iter(&result).count()
602            + RE_RCU_READ_LOCK.find_iter(&result).count()
603            + RE_RCU_READ_UNLOCK.find_iter(&result).count()
604            + RE_SYNCHRONIZE_RCU.find_iter(&result).count();
605        self.stats.rcu_simplified += count;
606
607        // rcu_dereference(p) -> (p)
608        result = self.replace_macro_with_arg(&result, &RE_RCU_DEREF);
609
610        // rcu_assign_pointer needs special handling - it's rcu_assign_pointer(p, v)
611        // For now, simplify to just a comment
612        // This is complex because it has two args
613
614        // rcu_read_lock() / rcu_read_unlock() -> ((void)0)
615        // These are barrier operations, safe to stub out for parsing
616        result = RE_RCU_READ_LOCK
617            .replace_all(&result, "((void)0)")
618            .to_string();
619        result = RE_RCU_READ_UNLOCK
620            .replace_all(&result, "((void)0)")
621            .to_string();
622        result = RE_SYNCHRONIZE_RCU
623            .replace_all(&result, "((void)0)")
624            .to_string();
625
626        result
627    }
628
629    fn simplify_memory_ordering(&mut self, source: &str) -> String {
630        let mut result = source.to_string();
631
632        // READ_ONCE(x) -> (*(volatile typeof(x) *)&(x)) - simplified to (x)
633        result = self.replace_macro_with_arg(&result, &RE_READ_ONCE);
634
635        // WRITE_ONCE(x, v) is harder - two args
636        // ACCESS_ONCE(x) -> (x)
637        result = self.replace_macro_with_arg(&result, &RE_ACCESS_ONCE);
638
639        // smp_load_acquire(x) -> (x)
640        result = self.replace_macro_with_arg(&result, &RE_SMP_LOAD);
641
642        // smp_store_release(x, v) - two args, skip for now
643
644        result
645    }
646
647    fn handle_typeof(&mut self, source: &str) -> String {
648        let mut result = String::new();
649        let mut last_end = 0;
650        let mut count = 0;
651
652        // typeof(x) -> __auto_type or just remove in cast contexts
653        // For casts like (typeof(x))y, we can simplify to just the variable type
654        for m in RE_TYPEOF.find_iter(source) {
655            result.push_str(&source[last_end..m.start()]);
656
657            let remaining = &source[m.end()..];
658            if let Some(paren_end) = self.find_matching_paren(remaining) {
659                let arg = &remaining[..paren_end];
660                count += 1;
661
662                // Check if this is a cast context: (typeof(x))
663                // Look back to see if we're inside parens
664                let before = &source[..m.start()];
665                let trimmed_before = before.trim_end();
666
667                if trimmed_before.ends_with('(') {
668                    // This is likely a cast: (typeof(x))
669                    // Check what comes after the closing paren
670                    let after_paren = &source[m.end() + paren_end + 1..];
671                    if after_paren.trim_start().starts_with(')') {
672                        // Replace (typeof(x)) with (__typeof_cast__)
673                        // which is valid C syntax (though meaningless)
674                        result.push_str("void *");
675                    } else {
676                        // Keep typeof but mark it
677                        result.push_str("__auto_type /* typeof(");
678                        result.push_str(arg);
679                        result.push_str(") */");
680                    }
681                } else {
682                    // Variable declaration: typeof(x) var = ...
683                    // Replace with __auto_type which is a GCC extension but parseable
684                    // Or use void* as fallback
685                    result.push_str("__auto_type /* typeof(");
686                    result.push_str(arg);
687                    result.push_str(") */");
688                }
689                last_end = m.end() + paren_end + 1;
690            } else {
691                result.push_str(m.as_str());
692                last_end = m.end();
693            }
694        }
695
696        result.push_str(&source[last_end..]);
697        self.stats.typeof_replaced += count;
698
699        result
700    }
701
702    fn handle_error_pointers(&self, source: &str) -> String {
703        let mut result = source.to_string();
704
705        // IS_ERR(x) -> ((unsigned long)(x) >= (unsigned long)-4095)
706        // Simplify to just a function call style
707        result = self.replace_macro_with_arg(&result, &RE_IS_ERR);
708
709        // PTR_ERR(x) -> ((long)(x))
710        result = self.replace_macro_with_arg(&result, &RE_PTR_ERR);
711
712        // ERR_PTR(x) -> ((void *)(long)(x))
713        result = self.replace_macro_with_arg(&result, &RE_ERR_PTR);
714
715        // ERR_CAST(x) -> ((void *)(x))
716        result = self.replace_macro_with_arg(&result, &RE_ERR_CAST);
717
718        result
719    }
720
721    fn simplify_statement_expressions(&mut self, source: &str) -> String {
722        let mut result = source.to_string();
723        let mut count = 0;
724
725        // Only handle simple statement expressions without nested braces
726        // ({ simple_expr; }) -> (simple_expr)
727
728        // This is a conservative approach - only handle the simplest cases
729        loop {
730            let prev = result.clone();
731            result = RE_STMT_EXPR
732                .replace(&result, |caps: &regex::Captures| {
733                    let matched = caps.get(0).unwrap().as_str();
734                    // Extract inner content: ({ ... }) -> ...
735                    let inner = &matched[2..matched.len() - 2]; // Remove ({ and })
736
737                    // If it's a simple expression ending in semicolon, extract it
738                    let trimmed = inner.trim();
739                    if let Some(expr) = trimmed.strip_suffix(';') {
740                        count += 1;
741                        format!("({})", expr.trim())
742                    } else if !trimmed.contains(';') {
743                        // No semicolon at all - might be just an expression
744                        count += 1;
745                        format!("({trimmed})")
746                    } else {
747                        // Multiple statements - too complex, leave as is
748                        matched.to_string()
749                    }
750                })
751                .to_string();
752
753            if result == prev {
754                break;
755            }
756        }
757
758        self.stats.statement_expressions_simplified += count;
759        result
760    }
761
762    fn handle_misc_macros(&mut self, source: &str) -> String {
763        let mut result = source.to_string();
764
765        // ARRAY_SIZE(arr) -> (sizeof(arr)/sizeof((arr)[0]))
766        result = RE_ARRAY_SIZE
767            .replace_all(&result, "(sizeof($1)/sizeof(($1)[0]))")
768            .to_string();
769
770        // BIT(n) -> (1UL << (n))
771        result = self.expand_bit_macro(&result, &RE_BIT, "1UL");
772
773        // BIT_ULL(n) -> (1ULL << (n))
774        result = self.expand_bit_macro(&result, &RE_BIT_ULL, "1ULL");
775
776        // GENMASK(h, l) and GENMASK_ULL(h, l) -> ((~0UL) & mask_calc)
777        // Simplified: just extract as function call style
778        result = self.simplify_two_arg_macro(&result, &RE_GENMASK, "0xFFFFFFFFUL");
779        result = self.simplify_two_arg_macro(&result, &RE_GENMASK_ULL, "0xFFFFFFFFFFFFFFFFULL");
780
781        // FIELD_PREP(mask, val) -> (val) - simplified
782        result = self.extract_second_arg(&result, &RE_FIELD_PREP);
783
784        // FIELD_GET(mask, val) -> (val) - simplified
785        result = self.extract_second_arg(&result, &RE_FIELD_GET);
786
787        // sizeof_field(type, member) -> sizeof(int) - simplified placeholder
788        result = self.replace_macro_with_value(&result, &RE_SIZEOF_FIELD, "sizeof(int)");
789
790        // IS_ENABLED(CONFIG_...) -> (0) - assume disabled for parsing
791        result = self.replace_macro_with_value(&result, &RE_IS_ENABLED, "(0)");
792
793        // Token concatenation - replace ## with _ to create valid identifiers
794        result = RE_TOKEN_CONCAT
795            .replace_all(&result, "${1}_${2}")
796            .to_string();
797
798        // min_t(type, a, b) -> ((a) < (b) ? (a) : (b))
799        // This is complex due to three args, skip for now
800
801        // clamp(val, lo, hi) - three args, skip for now
802
803        // offsetof is usually handled by the compiler, leave it
804
805        result
806    }
807
808    /// Expand BIT(n) or BIT_ULL(n) to (base << (n))
809    fn expand_bit_macro(&self, source: &str, pattern: &Regex, base: &str) -> String {
810        let mut result = String::new();
811        let mut last_end = 0;
812
813        for m in pattern.find_iter(source) {
814            result.push_str(&source[last_end..m.start()]);
815
816            let remaining = &source[m.end()..];
817            if let Some(paren_end) = self.find_matching_paren(remaining) {
818                let arg = &remaining[..paren_end];
819                result.push_str(&format!("({base} << ({arg}))"));
820                last_end = m.end() + paren_end + 1;
821            } else {
822                result.push_str(m.as_str());
823                last_end = m.end();
824            }
825        }
826
827        result.push_str(&source[last_end..]);
828        result
829    }
830
831    /// Simplify a two-arg macro to just return a constant
832    fn simplify_two_arg_macro(&self, source: &str, pattern: &Regex, value: &str) -> String {
833        let mut result = String::new();
834        let mut last_end = 0;
835
836        for m in pattern.find_iter(source) {
837            result.push_str(&source[last_end..m.start()]);
838
839            let remaining = &source[m.end()..];
840            if let Some(paren_end) = self.find_matching_paren(remaining) {
841                result.push_str(&format!("({value})"));
842                last_end = m.end() + paren_end + 1;
843            } else {
844                result.push_str(m.as_str());
845                last_end = m.end();
846            }
847        }
848
849        result.push_str(&source[last_end..]);
850        result
851    }
852
853    /// Extract the second argument from a two-arg macro
854    fn extract_second_arg(&self, source: &str, pattern: &Regex) -> String {
855        let mut result = String::new();
856        let mut last_end = 0;
857
858        for m in pattern.find_iter(source) {
859            result.push_str(&source[last_end..m.start()]);
860
861            let remaining = &source[m.end()..];
862            if let Some(paren_end) = self.find_matching_paren(remaining) {
863                let args = &remaining[..paren_end];
864                // Split on comma, taking care of nested parens
865                if let Some(second_arg) = self.split_macro_args(args).get(1) {
866                    result.push_str(&format!("({second_arg})"));
867                } else {
868                    result.push_str(&format!("({args})"));
869                }
870                last_end = m.end() + paren_end + 1;
871            } else {
872                result.push_str(m.as_str());
873                last_end = m.end();
874            }
875        }
876
877        result.push_str(&source[last_end..]);
878        result
879    }
880
881    /// Replace a macro call with a fixed value
882    fn replace_macro_with_value(&self, source: &str, pattern: &Regex, value: &str) -> String {
883        let mut result = String::new();
884        let mut last_end = 0;
885
886        for m in pattern.find_iter(source) {
887            result.push_str(&source[last_end..m.start()]);
888
889            let remaining = &source[m.end()..];
890            if let Some(paren_end) = self.find_matching_paren(remaining) {
891                result.push_str(value);
892                last_end = m.end() + paren_end + 1;
893            } else {
894                result.push_str(m.as_str());
895                last_end = m.end();
896            }
897        }
898
899        result.push_str(&source[last_end..]);
900        result
901    }
902
903    /// Split macro arguments, respecting nested parentheses
904    fn split_macro_args(&self, args: &str) -> Vec<String> {
905        let mut result = Vec::new();
906        let mut current = String::new();
907        let mut depth = 0;
908
909        for c in args.chars() {
910            match c {
911                '(' => {
912                    depth += 1;
913                    current.push(c);
914                }
915                ')' => {
916                    depth -= 1;
917                    current.push(c);
918                }
919                ',' if depth == 0 => {
920                    result.push(current.trim().to_string());
921                    current = String::new();
922                }
923                _ => {
924                    current.push(c);
925                }
926            }
927        }
928
929        if !current.is_empty() {
930            result.push(current.trim().to_string());
931        }
932
933        result
934    }
935
936    /// Handle iterator macros by converting them to simple for loops
937    /// These macros are notoriously problematic because they expand to complex
938    /// constructs that include variable declarations in non-standard positions.
939    fn handle_iterator_macros(&mut self, source: &str) -> String {
940        let mut result = source.to_string();
941
942        // Count occurrences
943        let list_count = RE_LIST_FOR_EACH.find_iter(&result).count()
944            + RE_HLIST_FOR_EACH.find_iter(&result).count();
945        let for_each_count =
946            RE_FOR_EACH.find_iter(&result).count() + RE_FOR_EACH_BIT.find_iter(&result).count();
947
948        self.stats.list_for_each_expanded += list_count;
949        self.stats.for_each_expanded += for_each_count;
950
951        // For list_for_each_entry(pos, head, member) { body }
952        // Convert to: for (void *__iter = 0; __iter != (void*)1; __iter = (void*)1) { body }
953        // This is a stub that allows the parser to handle the construct
954
955        // The key insight is that these macros are followed by a block.
956        // We can convert them to simple for loops that the parser can understand.
957
958        // Strategy: Replace the macro invocation with a simple for-loop header
959        // list_for_each_entry(pos, head, member) -> for (;pos;)
960        result = self.convert_iterator_to_for_loop(&result, &RE_LIST_FOR_EACH);
961        result = self.convert_iterator_to_for_loop(&result, &RE_HLIST_FOR_EACH);
962        result = self.convert_iterator_to_for_loop(&result, &RE_FOR_EACH);
963        result = self.convert_iterator_to_for_loop(&result, &RE_FOR_EACH_BIT);
964        result = self.convert_iterator_to_for_loop(&result, &RE_RADIX_TREE_FOR_EACH);
965        result = self.convert_iterator_to_for_loop(&result, &RE_XA_FOR_EACH);
966
967        result
968    }
969
970    /// Convert an iterator macro to a simple for loop
971    fn convert_iterator_to_for_loop(&self, source: &str, pattern: &Regex) -> String {
972        let mut result = String::new();
973        let mut last_end = 0;
974
975        for m in pattern.find_iter(source) {
976            result.push_str(&source[last_end..m.start()]);
977
978            // Find the closing paren of the macro call
979            let remaining = &source[m.end()..];
980            if let Some(paren_end) = self.find_matching_paren(remaining) {
981                // Extract arguments
982                let args = &remaining[..paren_end];
983                let parsed_args = self.split_macro_args(args);
984
985                // Determine which argument is the iterator variable based on macro type
986                // - list_for_each_entry(pos, head, member): pos (first) is iterator
987                // - ice_for_each_vsi(pf, v): v (second) is iterator
988                // - for_each_set_bit(bit, addr, size): bit (first) is iterator
989                // Use heuristic: if macro name contains "entry" or starts with "list_/hlist_",
990                // first arg is iterator. Otherwise, use last simple identifier as iterator.
991                let macro_name = m.as_str().trim();
992                let iter_var = if macro_name.contains("entry")
993                    || macro_name.starts_with("list_")
994                    || macro_name.starts_with("hlist_")
995                {
996                    // First argument is the iterator
997                    parsed_args.first().map(|s| s.as_str()).unwrap_or("__iter")
998                } else {
999                    // For ice_for_each_* style macros, last argument is often the iterator
1000                    // But we need to pick a simple identifier, not an expression
1001                    parsed_args
1002                        .iter()
1003                        .rev()
1004                        .find(|arg| {
1005                            let trimmed = arg.trim();
1006                            // Check if it's a simple identifier (not containing operators/parens)
1007                            !trimmed.is_empty()
1008                                && !trimmed.contains('(')
1009                                && !trimmed.contains(')')
1010                                && !trimmed.contains('-')
1011                                && !trimmed.contains('+')
1012                                && !trimmed.contains('&')
1013                                && !trimmed.contains('*')
1014                                && trimmed.chars().all(|c| c.is_alphanumeric() || c == '_')
1015                        })
1016                        .map(|s| s.as_str())
1017                        .unwrap_or_else(|| {
1018                            parsed_args.first().map(|s| s.as_str()).unwrap_or("__iter")
1019                        })
1020                };
1021
1022                // Replace with a simple for loop header
1023                // for (;iter_var;) keeps the variable reference so the body parses correctly
1024                result.push_str(&format!("for (;{iter_var};)"));
1025                last_end = m.end() + paren_end + 1;
1026            } else {
1027                // Couldn't find matching paren, keep original
1028                result.push_str(m.as_str());
1029                last_end = m.end();
1030            }
1031        }
1032
1033        result.push_str(&source[last_end..]);
1034        result
1035    }
1036
1037    /// Handle container_of macro
1038    /// container_of(ptr, type, member) -> ((type*)ptr)
1039    fn handle_container_of(&mut self, source: &str) -> String {
1040        let mut result = String::new();
1041        let mut last_end = 0;
1042
1043        let count = RE_CONTAINER_OF.find_iter(source).count();
1044        self.stats.container_of_expanded += count;
1045
1046        for m in RE_CONTAINER_OF.find_iter(source) {
1047            result.push_str(&source[last_end..m.start()]);
1048
1049            // Find the closing paren
1050            let remaining = &source[m.end()..];
1051            if let Some(paren_end) = self.find_matching_paren(remaining) {
1052                let args = &remaining[..paren_end];
1053                let parts: Vec<&str> = args.splitn(3, ',').collect();
1054
1055                if parts.len() >= 2 {
1056                    let ptr = parts[0].trim();
1057                    let type_name = parts[1].trim();
1058                    // Convert to a simple cast: ((type*)ptr)
1059                    result.push_str(&format!("(({type_name}*){ptr})"));
1060                } else {
1061                    // Not enough args, keep as-is with a simple cast wrapper
1062                    result.push_str(&format!("((void*){args})"));
1063                }
1064                last_end = m.end() + paren_end + 1;
1065            } else {
1066                result.push_str(m.as_str());
1067                last_end = m.end();
1068            }
1069        }
1070
1071        result.push_str(&source[last_end..]);
1072        result
1073    }
1074
1075    /// Handle MODULE_* and EXPORT_SYMBOL macros
1076    /// These are typically at file scope and cause parsing issues
1077    fn handle_module_macros(&self, source: &str) -> String {
1078        let mut result = String::new();
1079
1080        // Process line by line to handle line-anchored patterns
1081        for line in source.lines() {
1082            // Check if this line is an EXPORT_SYMBOL or MODULE_* macro
1083            if RE_EXPORT_SYMBOL.is_match(line) {
1084                // Replace with empty or comment
1085                result.push_str("/* ");
1086                result.push_str(line.trim());
1087                result.push_str(" */\n");
1088            } else if RE_MODULE_MACRO.is_match(line) {
1089                // Replace with a comment
1090                result.push_str("/* ");
1091                result.push_str(line.trim());
1092                result.push_str(" */\n");
1093            } else {
1094                result.push_str(line);
1095                result.push('\n');
1096            }
1097        }
1098
1099        // Remove trailing newline if source didn't have one
1100        if !source.ends_with('\n') && result.ends_with('\n') {
1101            result.pop();
1102        }
1103
1104        result
1105    }
1106
1107    /// Handle CAPS_CAPS macros that are used as struct initializers in arrays
1108    ///
1109    /// This uses context-based detection rather than hardcoded macro names:
1110    /// 1. The macro must be ALL_CAPS with underscores (e.g., FOO_BAR, MY_MACRO)
1111    /// 2. The macro must appear in array initializer context:
1112    ///    - After `{` or `,` (start of an initializer element)
1113    ///    - Followed by `,` or `}` (end of an initializer element)
1114    ///
1115    /// This approach works for any codebase, not just specific drivers.
1116    /// Common patterns this catches:
1117    /// - Linux kernel: ICE_VSI_STAT(), DEFINE_PROP_*(), PCI_DEVICE()
1118    /// - General: MY_ENTRY(), TABLE_ROW(), CONFIG_ITEM()
1119    fn handle_caps_macros(&self, source: &str) -> String {
1120        // Macros we know are NOT struct initializers (already handled or expression-like)
1121        let skip_macros = [
1122            // Already handled by other phases
1123            "ARRAY_SIZE",
1124            "BIT",
1125            "BIT_ULL",
1126            "GENMASK",
1127            "GENMASK_ULL",
1128            "FIELD_PREP",
1129            "FIELD_GET",
1130            "IS_ENABLED",
1131            "BUILD_BUG_ON",
1132            "BUILD_BUG_ON_MSG",
1133            "WARN_ON",
1134            "WARN_ON_ONCE",
1135            "BUG_ON",
1136            "READ_ONCE",
1137            "WRITE_ONCE",
1138            "ACCESS_ONCE",
1139            "IS_ERR",
1140            "IS_ERR_OR_NULL",
1141            "PTR_ERR",
1142            "ERR_PTR",
1143            "ERR_CAST",
1144            // Common expression macros that return values, not initializers
1145            "ALIGN",
1146            "DIV_ROUND_UP",
1147            "BITS_TO_LONGS",
1148            "BITS_PER_BYTE",
1149            "BITS_PER_LONG",
1150            "PAGE_SIZE",
1151            "PAGE_SHIFT",
1152            "SZ_1K",
1153            "SZ_4K",
1154            "SZ_1M",
1155            "HZ",
1156            "NSEC_PER_SEC",
1157            "USEC_PER_SEC",
1158            "MSEC_PER_SEC",
1159            // Version/compatibility macros
1160            "KERNEL_VERSION",
1161            "RHEL_RELEASE_VERSION",
1162            "SLE_VERSION",
1163            "UTS_UBUNTU_RELEASE_ABI",
1164            // Type conversion macros
1165            "U8_MAX",
1166            "U16_MAX",
1167            "U32_MAX",
1168            "U64_MAX",
1169            "S8_MAX",
1170            "S16_MAX",
1171            "S32_MAX",
1172            "S64_MAX",
1173            // Runtime PM macros (special syntax)
1174            "SET_RUNTIME_PM_OPS",
1175            "SET_SYSTEM_SLEEP_PM_OPS",
1176        ];
1177
1178        let mut result = String::new();
1179        let mut last_end = 0;
1180
1181        for m in RE_CAPS_MACRO.find_iter(source) {
1182            // Skip if this match starts before where we've already processed
1183            if m.start() < last_end {
1184                continue;
1185            }
1186
1187            let macro_name = m.as_str().trim_end_matches(['(', ' ']);
1188
1189            // Skip known non-initializer macros
1190            if skip_macros.contains(&macro_name) {
1191                continue;
1192            }
1193
1194            // Check context - must be in array initializer position
1195            // This is tricky because function calls also have commas
1196            // Key insight: array initializers have `= {` before the first element
1197            // while function calls have `(` before arguments
1198            let before = &source[..m.start()];
1199            let trimmed_before = before.trim_end();
1200
1201            // Must be after { to be the first element, or after , for subsequent
1202            let after_brace = trimmed_before.ends_with('{');
1203            let after_comma = trimmed_before.ends_with(',');
1204
1205            if !after_brace && !after_comma {
1206                continue;
1207            }
1208
1209            // If after comma, verify we're in an initializer by checking for `= {`
1210            // This prevents matching function call arguments like foo(x, MACRO(y))
1211            if after_comma {
1212                // Look backwards for opening brace, but stop at semicolon or closing brace
1213                // which would indicate we're not in an initializer
1214                let mut brace_depth = 0;
1215                let mut found_init_brace = false;
1216                for c in trimmed_before.chars().rev() {
1217                    match c {
1218                        '}' => brace_depth += 1,
1219                        '{' => {
1220                            if brace_depth == 0 {
1221                                found_init_brace = true;
1222                                break;
1223                            }
1224                            brace_depth -= 1;
1225                        }
1226                        ';' => break, // Statement boundary
1227                        '(' => break, // Function call, not initializer
1228                        _ => {}
1229                    }
1230                }
1231                if !found_init_brace {
1232                    continue;
1233                }
1234            }
1235
1236            // Find the closing paren to check what follows
1237            let remaining = &source[m.end()..];
1238            if let Some(paren_end) = self.find_matching_paren(remaining) {
1239                let after_paren = remaining[paren_end + 1..].trim_start();
1240
1241                // Must be followed by , or } to be an array element
1242                let before_comma_or_brace =
1243                    after_paren.starts_with(',') || after_paren.starts_with('}');
1244
1245                if !before_comma_or_brace {
1246                    continue;
1247                }
1248
1249                // This is definitely a struct initializer in an array
1250                result.push_str(&source[last_end..m.start()]);
1251                result.push_str("{ 0 }");
1252                last_end = m.end() + paren_end + 1;
1253            }
1254        }
1255
1256        result.push_str(&source[last_end..]);
1257        result
1258    }
1259
1260    /// Helper to replace a macro call with its argument
1261    /// e.g., likely(x) -> (x)
1262    fn replace_macro_with_arg(&self, source: &str, pattern: &Regex) -> String {
1263        let mut result = String::new();
1264        let mut last_end = 0;
1265
1266        for m in pattern.find_iter(source) {
1267            result.push_str(&source[last_end..m.start()]);
1268
1269            // Find the matching closing paren
1270            let remaining = &source[m.end()..];
1271            if let Some(arg_end) = self.find_matching_paren(remaining) {
1272                let arg = &remaining[..arg_end];
1273                result.push('(');
1274                result.push_str(arg);
1275                result.push(')');
1276                last_end = m.end() + arg_end + 1; // +1 for the closing paren
1277            } else {
1278                // Couldn't find matching paren, keep original
1279                result.push_str(m.as_str());
1280                last_end = m.end();
1281            }
1282        }
1283
1284        result.push_str(&source[last_end..]);
1285        result
1286    }
1287
1288    /// Find the position of the matching closing parenthesis
1289    fn find_matching_paren(&self, s: &str) -> Option<usize> {
1290        let mut depth = 1;
1291        for (i, c) in s.char_indices() {
1292            match c {
1293                '(' => depth += 1,
1294                ')' => {
1295                    depth -= 1;
1296                    if depth == 0 {
1297                        return Some(i);
1298                    }
1299                }
1300                _ => {}
1301            }
1302        }
1303        None
1304    }
1305}
1306
1307impl Default for MacroNeutralizer {
1308    fn default() -> Self {
1309        Self::new()
1310    }
1311}
1312
1313#[cfg(test)]
1314mod tests {
1315    use super::*;
1316
1317    #[test]
1318    fn test_likely_unlikely() {
1319        let mut neutralizer = MacroNeutralizer::new();
1320
1321        let input = "if (likely(x > 0)) { } if (unlikely(y < 0)) { }";
1322        let output = neutralizer.neutralize(input);
1323
1324        assert!(output.contains("if ((x > 0))"));
1325        assert!(output.contains("if ((y < 0))"));
1326        assert_eq!(neutralizer.stats.likely_unlikely_stripped, 2);
1327    }
1328
1329    #[test]
1330    fn test_build_bug_on() {
1331        let mut neutralizer = MacroNeutralizer::new();
1332
1333        let input = "BUILD_BUG_ON(sizeof(x) != 4);";
1334        let output = neutralizer.neutralize(input);
1335
1336        assert!(output.contains("((void)0)"));
1337        assert_eq!(neutralizer.stats.build_bug_on_stripped, 1);
1338    }
1339
1340    #[test]
1341    fn test_define_mutex() {
1342        let mut neutralizer = MacroNeutralizer::new();
1343
1344        let input = "DEFINE_MUTEX(my_lock);";
1345        let output = neutralizer.neutralize(input);
1346
1347        assert!(output.contains("struct mutex my_lock"));
1348        assert_eq!(neutralizer.stats.define_macros_stubbed, 1);
1349    }
1350
1351    #[test]
1352    fn test_annotation_stripping() {
1353        let mut neutralizer = MacroNeutralizer::new();
1354
1355        let input = "static __init __cold int my_func(void __user *ptr)";
1356        let output = neutralizer.neutralize(input);
1357
1358        assert!(!output.contains("__init"));
1359        assert!(!output.contains("__cold"));
1360        assert!(!output.contains("__user"));
1361    }
1362
1363    #[test]
1364    fn test_simple_statement_expression() {
1365        let mut neutralizer = MacroNeutralizer::new();
1366
1367        let input = "int x = ({ 42; });";
1368        let output = neutralizer.neutralize(input);
1369
1370        assert!(output.contains("(42)") || output.contains("({ 42; })"));
1371    }
1372
1373    #[test]
1374    fn test_warn_on() {
1375        let mut neutralizer = MacroNeutralizer::new();
1376
1377        let input = "WARN_ON(ptr == NULL);";
1378        let output = neutralizer.neutralize(input);
1379
1380        // Should convert to just the condition
1381        assert!(output.contains("(ptr == NULL)"));
1382    }
1383
1384    #[test]
1385    fn test_array_size() {
1386        let mut neutralizer = MacroNeutralizer::new();
1387
1388        let input = "int count = ARRAY_SIZE(my_array);";
1389        let output = neutralizer.neutralize(input);
1390
1391        assert!(output.contains("sizeof(my_array)"));
1392    }
1393}