1use std::collections::HashMap;
14
15#[derive(Debug, Clone, PartialEq)]
17pub enum MacroKind {
18 TypeAlias,
20 Attribute,
22 FunctionWrapper,
24 ModuleDeclaration,
26 LockingPrimitive,
28 MemoryOperation,
30 ConditionalMarker,
32 Generic,
34}
35
36#[derive(Debug, Clone)]
38pub struct MacroInfo {
39 pub name: String,
40 pub kind: MacroKind,
41 pub expansion_hint: Option<String>,
42}
43
44pub struct CPreprocessor {
46 type_macros: HashMap<String, String>,
48 attribute_macros: Vec<String>,
50 function_wrappers: HashMap<String, String>,
52 module_macros: Vec<String>,
54}
55
56impl Default for CPreprocessor {
57 fn default() -> Self {
58 Self::new()
59 }
60}
61
62impl CPreprocessor {
63 pub fn new() -> Self {
64 let mut preprocessor = Self {
65 type_macros: HashMap::new(),
66 attribute_macros: Vec::new(),
67 function_wrappers: HashMap::new(),
68 module_macros: Vec::new(),
69 };
70 preprocessor.init_kernel_macros();
71 preprocessor.init_standard_macros();
72 preprocessor
73 }
74
75 fn init_kernel_macros(&mut self) {
77 for (macro_name, expansion) in [
79 ("u8", "unsigned char"),
80 ("u16", "unsigned short"),
81 ("u32", "unsigned int"),
82 ("u64", "unsigned long long"),
83 ("s8", "signed char"),
84 ("s16", "signed short"),
85 ("s32", "signed int"),
86 ("s64", "signed long long"),
87 ("__u8", "unsigned char"),
88 ("__u16", "unsigned short"),
89 ("__u32", "unsigned int"),
90 ("__u64", "unsigned long long"),
91 ("__s8", "signed char"),
92 ("__s16", "signed short"),
93 ("__s32", "signed int"),
94 ("__s64", "signed long long"),
95 ("__le16", "unsigned short"),
96 ("__le32", "unsigned int"),
97 ("__le64", "unsigned long long"),
98 ("__be16", "unsigned short"),
99 ("__be32", "unsigned int"),
100 ("__be64", "unsigned long long"),
101 ("bool", "_Bool"),
102 ("atomic_t", "int"),
103 ("atomic64_t", "long long"),
104 ("spinlock_t", "int"),
105 ("rwlock_t", "int"),
106 ("mutex", "int"),
107 ("size_t", "unsigned long"),
108 ("ssize_t", "long"),
109 ("ptrdiff_t", "long"),
110 ("uintptr_t", "unsigned long"),
111 ("intptr_t", "long"),
112 ("phys_addr_t", "unsigned long long"),
113 ("dma_addr_t", "unsigned long long"),
114 ("resource_size_t", "unsigned long long"),
115 ("gfp_t", "unsigned int"),
116 ("fmode_t", "unsigned int"),
117 ("umode_t", "unsigned short"),
118 ("dev_t", "unsigned int"),
119 ("loff_t", "long long"),
120 ("pid_t", "int"),
121 ("uid_t", "unsigned int"),
122 ("gid_t", "unsigned int"),
123 ("ktime_t", "long long"),
124 ] {
125 self.type_macros
126 .insert(macro_name.to_string(), expansion.to_string());
127 }
128
129 self.attribute_macros.extend(
131 [
132 "__init",
134 "__exit",
135 "__initdata",
136 "__exitdata",
137 "__initconst",
138 "__devinit",
139 "__devexit",
140 "__cold",
142 "__hot",
143 "__pure",
144 "__const",
145 "__noreturn",
146 "__malloc",
147 "__weak",
148 "__alias",
149 "__always_inline",
150 "__noinline",
151 "noinline",
152 "inline",
153 "__inline",
154 "__inline__",
155 "__section",
156 "__visible",
157 "__flatten",
158 "__user",
160 "__kernel",
161 "__iomem",
162 "__percpu",
163 "__rcu",
164 "__force",
165 "__bitwise",
166 "__safe",
167 "__maybe_unused",
169 "__always_unused",
170 "__unused",
171 "__packed",
173 "__aligned",
174 "__cacheline_aligned",
175 "__cacheline_aligned_in_smp",
176 "__page_aligned_data",
177 "__page_aligned_bss",
178 "__deprecated",
180 "__deprecated_for_modules",
181 "__must_check",
183 "__must_hold",
184 "__acquires",
185 "__releases",
186 "__acquire",
187 "__release",
188 "__cond_lock",
189 "__read_mostly",
191 "__ro_after_init",
192 "asmlinkage",
194 "fastcall",
195 "regparm",
196 "EXPORT_SYMBOL",
198 "EXPORT_SYMBOL_GPL",
199 "EXPORT_SYMBOL_NS",
200 "EXPORT_SYMBOL_NS_GPL",
201 "likely",
203 "unlikely",
204 "ACCESS_ONCE",
206 "READ_ONCE",
207 "WRITE_ONCE",
208 "__wsum",
210 "__sum16",
211 "__be16",
212 "__be32",
213 "__be64",
214 "__le16",
215 "__le32",
216 "__le64",
217 ]
218 .iter()
219 .map(|s| s.to_string()),
220 );
221
222 for (wrapper, ret_type) in [
224 ("SYSCALL_DEFINE0", "long"),
225 ("SYSCALL_DEFINE1", "long"),
226 ("SYSCALL_DEFINE2", "long"),
227 ("SYSCALL_DEFINE3", "long"),
228 ("SYSCALL_DEFINE4", "long"),
229 ("SYSCALL_DEFINE5", "long"),
230 ("SYSCALL_DEFINE6", "long"),
231 ("COMPAT_SYSCALL_DEFINE0", "long"),
232 ("COMPAT_SYSCALL_DEFINE1", "long"),
233 ("COMPAT_SYSCALL_DEFINE2", "long"),
234 ("COMPAT_SYSCALL_DEFINE3", "long"),
235 ("COMPAT_SYSCALL_DEFINE4", "long"),
236 ("COMPAT_SYSCALL_DEFINE5", "long"),
237 ("COMPAT_SYSCALL_DEFINE6", "long"),
238 ("__setup", "int"),
239 ("early_param", "int"),
240 ("core_param", "int"),
241 ("module_param", "void"),
242 ("module_param_named", "void"),
243 ("DEFINE_PER_CPU", "void"),
244 ("DECLARE_PER_CPU", "void"),
245 ] {
246 self.function_wrappers
247 .insert(wrapper.to_string(), ret_type.to_string());
248 }
249
250 self.module_macros.extend(
252 [
253 "MODULE_LICENSE",
254 "MODULE_AUTHOR",
255 "MODULE_DESCRIPTION",
256 "MODULE_VERSION",
257 "MODULE_ALIAS",
258 "MODULE_DEVICE_TABLE",
259 "MODULE_FIRMWARE",
260 "MODULE_INFO",
261 "MODULE_PARM_DESC",
262 "module_init",
263 "module_exit",
264 "late_initcall",
265 "subsys_initcall",
266 "fs_initcall",
267 "device_initcall",
268 "arch_initcall",
269 "core_initcall",
270 "postcore_initcall",
271 ]
272 .iter()
273 .map(|s| s.to_string()),
274 );
275 }
276
277 fn init_standard_macros(&mut self) {
279 for (macro_name, expansion) in [
281 ("NULL", "((void*)0)"),
282 ("EOF", "(-1)"),
283 ("true", "1"),
284 ("false", "0"),
285 ("TRUE", "1"),
286 ("FALSE", "0"),
287 ] {
288 self.type_macros
289 .insert(macro_name.to_string(), expansion.to_string());
290 }
291 }
292
293 pub fn is_type_macro(&self, name: &str) -> bool {
295 self.type_macros.contains_key(name)
296 }
297
298 pub fn get_type_expansion(&self, name: &str) -> Option<&str> {
300 self.type_macros.get(name).map(|s| s.as_str())
301 }
302
303 pub fn is_attribute_macro(&self, name: &str) -> bool {
305 self.attribute_macros.contains(&name.to_string())
306 }
307
308 pub fn is_function_wrapper(&self, name: &str) -> bool {
310 self.function_wrappers.contains_key(name)
311 }
312
313 pub fn is_module_macro(&self, name: &str) -> bool {
315 self.module_macros.contains(&name.to_string())
316 }
317
318 pub fn classify_macro(&self, name: &str) -> MacroKind {
320 if self.is_type_macro(name) {
321 MacroKind::TypeAlias
322 } else if self.is_attribute_macro(name) {
323 MacroKind::Attribute
324 } else if self.is_function_wrapper(name) {
325 MacroKind::FunctionWrapper
326 } else if self.is_module_macro(name) {
327 MacroKind::ModuleDeclaration
328 } else if name.starts_with("DEFINE_")
329 || name.starts_with("DECLARE_")
330 || name.contains("_LOCK")
331 || name.contains("_MUTEX")
332 {
333 MacroKind::LockingPrimitive
334 } else if name.contains("alloc")
335 || name.contains("free")
336 || name.starts_with("k")
337 && (name.contains("alloc") || name.contains("free") || name.contains("zalloc"))
338 {
339 MacroKind::MemoryOperation
340 } else if name.starts_with("CONFIG_")
341 || name.starts_with("IS_ENABLED")
342 || name.starts_with("IS_BUILTIN")
343 || name.starts_with("IS_MODULE")
344 {
345 MacroKind::ConditionalMarker
346 } else {
347 MacroKind::Generic
348 }
349 }
350
351 pub fn preprocess(&self, source: &str) -> String {
357 let mut result = String::with_capacity(source.len());
358
359 for line in source.lines() {
360 let processed = self.process_line(line);
361 result.push_str(&processed);
362 result.push('\n');
363 }
364
365 result
366 }
367
368 fn process_line(&self, line: &str) -> String {
370 let trimmed = line.trim();
371
372 if trimmed.is_empty() || trimmed.starts_with("//") {
374 return line.to_string();
375 }
376
377 if trimmed.starts_with("#include") {
379 return line.to_string();
380 }
381
382 if trimmed.starts_with("#define ")
390 || trimmed.starts_with("#undef ")
391 || trimmed.starts_with("#pragma ")
392 || trimmed.starts_with("#error ")
393 || trimmed.starts_with("#warning ")
394 {
395 return "/* preprocessor directive stripped */".to_string();
397 }
398
399 let mut result = line.to_string();
401 for attr in &self.attribute_macros {
402 let patterns = [format!("{attr} "), format!("{attr}\t"), format!("{attr}(")];
405
406 for pattern in &patterns {
407 if result.contains(pattern.as_str()) {
408 if pattern.ends_with('(') {
410 if let Some(start) = result.find(attr) {
412 if let Some(paren_start) = result[start..].find('(') {
413 let abs_paren = start + paren_start;
414 let mut depth = 1;
415 let mut end = abs_paren + 1;
416 for (i, c) in result[abs_paren + 1..].char_indices() {
417 match c {
418 '(' => depth += 1,
419 ')' => {
420 depth -= 1;
421 if depth == 0 {
422 end = abs_paren + 1 + i + 1;
423 break;
424 }
425 }
426 _ => {}
427 }
428 }
429 result = format!("{}{}", &result[..start], &result[end..]);
430 }
431 }
432 } else {
433 result = result.replace(pattern, "");
434 }
435 }
436 }
437 }
438
439 while let Some(start) = result.find("offsetof(") {
442 let rest = &result[start + 9..]; let mut depth = 1;
444 let mut end_paren = 0;
445
446 for (i, c) in rest.char_indices() {
447 match c {
448 '(' => depth += 1,
449 ')' => {
450 depth -= 1;
451 if depth == 0 {
452 end_paren = i;
453 break;
454 }
455 }
456 _ => {}
457 }
458 }
459
460 if end_paren > 0 {
461 result = format!(
462 "{}0{}",
463 &result[..start],
464 &result[start + 9 + end_paren + 1..]
465 );
466 } else {
467 break;
468 }
469 }
470
471 while let Some(start) = result.find("container_of(") {
474 let rest = &result[start + 13..]; let mut depth = 1;
476 let mut end_paren = 0;
477 let mut first_comma = None;
478
479 for (i, c) in rest.char_indices() {
480 match c {
481 '(' => depth += 1,
482 ')' => {
483 depth -= 1;
484 if depth == 0 {
485 end_paren = i;
486 break;
487 }
488 }
489 ',' if depth == 1 && first_comma.is_none() => {
490 first_comma = Some(i);
491 }
492 _ => {}
493 }
494 }
495
496 if end_paren > 0 {
497 let ptr = if let Some(comma_pos) = first_comma {
499 rest[..comma_pos].trim()
500 } else {
501 "ptr"
502 };
503 let replacement = format!("((void*){ptr})");
505 result = format!(
506 "{}{}{}",
507 &result[..start],
508 replacement,
509 &result[start + 13 + end_paren + 1..]
510 );
511 } else {
512 break;
513 }
514 }
515
516 while let Some(start) = result.find("__attribute__") {
518 if let Some(paren_start) = result[start..].find("((") {
519 let abs_start = start + paren_start;
520 let mut depth = 2; let mut end = abs_start + 2;
522 for (i, c) in result[abs_start + 2..].char_indices() {
523 match c {
524 '(' => depth += 1,
525 ')' => {
526 depth -= 1;
527 if depth == 0 {
528 end = abs_start + 2 + i + 1;
529 break;
530 }
531 }
532 _ => {}
533 }
534 }
535 result = format!("{}{}", &result[..start], &result[end..]);
536 } else {
537 break;
538 }
539 }
540
541 result
542 }
543
544 pub fn analyze_macros(&self, source: &str) -> Vec<MacroInfo> {
546 let mut macros = Vec::new();
547
548 for word in source.split(|c: char| !c.is_alphanumeric() && c != '_') {
550 if word.is_empty() {
551 continue;
552 }
553
554 let kind = self.classify_macro(word);
555 if kind != MacroKind::Generic {
556 macros.push(MacroInfo {
557 name: word.to_string(),
558 kind: kind.clone(),
559 expansion_hint: self.get_type_expansion(word).map(|s| s.to_string()),
560 });
561 }
562 }
563
564 macros.sort_by(|a, b| a.name.cmp(&b.name));
566 macros.dedup_by(|a, b| a.name == b.name);
567
568 macros
569 }
570}
571
572#[cfg(test)]
573mod tests {
574 use super::*;
575
576 #[test]
577 fn test_type_macro_recognition() {
578 let pp = CPreprocessor::new();
579 assert!(pp.is_type_macro("u8"));
580 assert!(pp.is_type_macro("u32"));
581 assert!(pp.is_type_macro("size_t"));
582 assert!(!pp.is_type_macro("unknown_type"));
583 }
584
585 #[test]
586 fn test_attribute_macro_recognition() {
587 let pp = CPreprocessor::new();
588 assert!(pp.is_attribute_macro("__init"));
589 assert!(pp.is_attribute_macro("__exit"));
590 assert!(pp.is_attribute_macro("__user"));
591 assert!(!pp.is_attribute_macro("regular_function"));
592 }
593
594 #[test]
595 fn test_macro_classification() {
596 let pp = CPreprocessor::new();
597 assert_eq!(pp.classify_macro("u32"), MacroKind::TypeAlias);
598 assert_eq!(pp.classify_macro("__init"), MacroKind::Attribute);
599 assert_eq!(
600 pp.classify_macro("MODULE_LICENSE"),
601 MacroKind::ModuleDeclaration
602 );
603 assert_eq!(
604 pp.classify_macro("DEFINE_MUTEX"),
605 MacroKind::LockingPrimitive
606 );
607 assert_eq!(
608 pp.classify_macro("CONFIG_DEBUG"),
609 MacroKind::ConditionalMarker
610 );
611 }
612
613 #[test]
614 fn test_preprocess_strips_attributes() {
615 let pp = CPreprocessor::new();
616 let source = "static __init int my_init(void) { return 0; }";
617 let processed = pp.preprocess(source);
618 assert!(!processed.contains("__init"));
619 assert!(processed.contains("static"));
620 assert!(processed.contains("int my_init"));
621 }
622
623 #[test]
624 fn test_preprocess_handles_attribute_syntax() {
625 let pp = CPreprocessor::new();
626 let source = "void __attribute__((packed)) my_struct;";
627 let processed = pp.preprocess(source);
628 assert!(!processed.contains("__attribute__"));
629 assert!(processed.contains("void"));
630 }
631
632 #[test]
633 fn test_analyze_macros() {
634 let pp = CPreprocessor::new();
635 let source = "u32 foo; __init static int bar(size_t n) { return 0; }";
636 let macros = pp.analyze_macros(source);
637
638 let names: Vec<_> = macros.iter().map(|m| m.name.as_str()).collect();
639 assert!(names.contains(&"u32"));
640 assert!(names.contains(&"__init"));
641 assert!(names.contains(&"size_t"));
642 }
643
644 #[test]
645 fn test_preprocess_preserves_includes() {
646 let pp = CPreprocessor::new();
647 let source = "#include <linux/module.h>\n#include \"myheader.h\"";
648 let processed = pp.preprocess(source);
649 assert!(processed.contains("#include <linux/module.h>"));
650 assert!(processed.contains("#include \"myheader.h\""));
651 }
652}