1use regex::Regex;
8use std::sync::LazyLock;
9
10#[derive(Debug, Clone, PartialEq)]
12pub enum TransformKind {
13 Attribute,
15 Extension,
17 Typeof,
19 StatementExpression,
21 Asm,
23 Restrict,
25 Inline,
27 Volatile,
29 TypeofUnderscore,
31 AlignofSizeof,
33}
34
35#[derive(Debug, Clone)]
37pub struct Transformation {
38 pub original_start: usize,
40 pub original_length: usize,
42 pub transformed_start: usize,
44 pub transformed_length: usize,
46 pub kind: TransformKind,
48 pub original_text: String,
50}
51
52#[derive(Debug)]
54pub struct NeutralizedSource {
55 pub code: String,
57 pub transformations: Vec<Transformation>,
59}
60
61pub struct GccNeutralizer {
63 }
66
67static RE_ATTRIBUTE: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"__attribute__\s*\(\(").unwrap());
70static RE_EXTENSION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__extension__\s*").unwrap());
71static RE_TYPEOF: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"typeof\s*\(").unwrap());
72static RE_TYPEOF_UNDERSCORE: LazyLock<Regex> =
73 LazyLock::new(|| Regex::new(r"__typeof__\s*\(").unwrap());
74static RE_TYPEOF_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__typeof\s*\(").unwrap());
75static RE_ASM: LazyLock<Regex> =
76 LazyLock::new(|| Regex::new(r"__asm__\s*(?:volatile\s*)?\(").unwrap());
77static RE_ASM_VOLATILE: LazyLock<Regex> =
78 LazyLock::new(|| Regex::new(r"__asm\s+volatile\s*\(").unwrap());
79static RE_RESTRICT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__restrict\s+").unwrap());
80static RE_RESTRICT_UNDERSCORE: LazyLock<Regex> =
81 LazyLock::new(|| Regex::new(r"__restrict__\s+").unwrap());
82static RE_INLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__inline__\s+").unwrap());
83static RE_INLINE_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__inline\s+").unwrap());
84static RE_VOLATILE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__volatile__\s+").unwrap());
85static RE_VOLATILE_SINGLE: LazyLock<Regex> =
86 LazyLock::new(|| Regex::new(r"__volatile\s+").unwrap());
87static RE_STATEMENT_EXPR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\(\{").unwrap());
88static RE_BUILTIN_OFFSETOF: LazyLock<Regex> =
89 LazyLock::new(|| Regex::new(r"__builtin_offsetof\s*\(").unwrap());
90static RE_BUILTIN_TYPES_COMPATIBLE: LazyLock<Regex> =
91 LazyLock::new(|| Regex::new(r"__builtin_types_compatible_p\s*\(").unwrap());
92
93impl Default for GccNeutralizer {
94 fn default() -> Self {
95 Self::new()
96 }
97}
98
99impl GccNeutralizer {
100 pub fn new() -> Self {
101 Self {}
102 }
103
104 pub fn neutralize(&self, source: &str) -> NeutralizedSource {
106 let mut code = source.to_string();
107 let mut transformations = Vec::new();
108
109 while let Some(m) = RE_ATTRIBUTE.find(&code) {
114 let start = m.start();
115 if let Some((end, _original)) = Self::find_double_paren_end(&code, start + m.len()) {
116 let original_text = code[start..end].to_string();
117 transformations.push(Transformation {
118 original_start: start,
119 original_length: end - start,
120 transformed_start: start,
121 transformed_length: 0,
122 kind: TransformKind::Attribute,
123 original_text,
124 });
125 code = format!("{}{}", &code[..start], &code[end..]);
126 } else {
127 break;
128 }
129 }
130
131 while let Some(m) = RE_EXTENSION.find(&code) {
133 let start = m.start();
134 let end = m.end();
135 transformations.push(Transformation {
136 original_start: start,
137 original_length: end - start,
138 transformed_start: start,
139 transformed_length: 0,
140 kind: TransformKind::Extension,
141 original_text: code[start..end].to_string(),
142 });
143 code = format!("{}{}", &code[..start], &code[end..]);
144 }
145
146 for regex in [&*RE_TYPEOF, &*RE_TYPEOF_UNDERSCORE, &*RE_TYPEOF_SINGLE] {
148 while let Some(m) = regex.find(&code) {
149 let start = m.start();
150 if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
151 let original_text = code[start..end].to_string();
152 transformations.push(Transformation {
153 original_start: start,
154 original_length: end - start,
155 transformed_start: start,
156 transformed_length: 3, kind: TransformKind::Typeof,
158 original_text,
159 });
160 code = format!("{}int{}", &code[..start], &code[end..]);
161 } else {
162 break;
163 }
164 }
165 }
166
167 for regex in [&*RE_ASM, &*RE_ASM_VOLATILE] {
169 while let Some(m) = regex.find(&code) {
170 let start = m.start();
171 if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
172 let stmt_end = code[end..].find(';').map(|i| end + i + 1).unwrap_or(end);
174 let original_text = code[start..stmt_end].to_string();
175 transformations.push(Transformation {
176 original_start: start,
177 original_length: stmt_end - start,
178 transformed_start: start,
179 transformed_length: 4, kind: TransformKind::Asm,
181 original_text,
182 });
183 code = format!("{}0{}", &code[..start], &code[stmt_end..]);
185 } else {
186 break;
187 }
188 }
189 }
190
191 for regex in [&*RE_RESTRICT, &*RE_RESTRICT_UNDERSCORE] {
193 while let Some(m) = regex.find(&code) {
194 let start = m.start();
195 let end = m.end();
196 transformations.push(Transformation {
197 original_start: start,
198 original_length: end - start,
199 transformed_start: start,
200 transformed_length: 0,
201 kind: TransformKind::Restrict,
202 original_text: code[start..end].to_string(),
203 });
204 code = format!("{}{}", &code[..start], &code[end..]);
205 }
206 }
207
208 for regex in [&*RE_INLINE, &*RE_INLINE_SINGLE] {
210 while let Some(m) = regex.find(&code) {
211 let start = m.start();
212 let end = m.end();
213 transformations.push(Transformation {
214 original_start: start,
215 original_length: end - start,
216 transformed_start: start,
217 transformed_length: 0,
218 kind: TransformKind::Inline,
219 original_text: code[start..end].to_string(),
220 });
221 code = format!("{}{}", &code[..start], &code[end..]);
222 }
223 }
224
225 for regex in [&*RE_VOLATILE, &*RE_VOLATILE_SINGLE] {
227 while let Some(m) = regex.find(&code) {
228 let start = m.start();
229 let end = m.end();
230 transformations.push(Transformation {
231 original_start: start,
232 original_length: end - start,
233 transformed_start: start,
234 transformed_length: 0,
235 kind: TransformKind::Volatile,
236 original_text: code[start..end].to_string(),
237 });
238 code = format!("{}{}", &code[..start], &code[end..]);
239 }
240 }
241
242 while let Some(m) = RE_STATEMENT_EXPR.find(&code) {
244 let start = m.start();
245 if let Some((end, _)) = Self::find_statement_expr_end(&code, start) {
246 let original_text = code[start..end].to_string();
247 transformations.push(Transformation {
248 original_start: start,
249 original_length: end - start,
250 transformed_start: start,
251 transformed_length: 3, kind: TransformKind::StatementExpression,
253 original_text,
254 });
255 code = format!("{}(0){}", &code[..start], &code[end..]);
256 } else {
257 break;
258 }
259 }
260
261 while let Some(m) = RE_BUILTIN_OFFSETOF.find(&code) {
263 let start = m.start();
264 if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
265 let original_text = code[start..end].to_string();
266 transformations.push(Transformation {
267 original_start: start,
268 original_length: end - start,
269 transformed_start: start,
270 transformed_length: 1, kind: TransformKind::AlignofSizeof,
272 original_text,
273 });
274 code = format!("{}0{}", &code[..start], &code[end..]);
275 } else {
276 break;
277 }
278 }
279
280 while let Some(m) = RE_BUILTIN_TYPES_COMPATIBLE.find(&code) {
282 let start = m.start();
283 if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
284 let original_text = code[start..end].to_string();
285 transformations.push(Transformation {
286 original_start: start,
287 original_length: end - start,
288 transformed_start: start,
289 transformed_length: 1, kind: TransformKind::AlignofSizeof,
291 original_text,
292 });
293 code = format!("{}0{}", &code[..start], &code[end..]);
294 } else {
295 break;
296 }
297 }
298
299 NeutralizedSource {
300 code,
301 transformations,
302 }
303 }
304
305 fn find_double_paren_end(code: &str, start: usize) -> Option<(usize, String)> {
307 let bytes = code.as_bytes();
308 let mut depth = 2; let mut i = start;
310
311 while i < bytes.len() && depth > 0 {
312 match bytes[i] {
313 b'(' => depth += 1,
314 b')' => depth -= 1,
315 b'"' => {
316 i += 1;
318 while i < bytes.len() && bytes[i] != b'"' {
319 if bytes[i] == b'\\' {
320 i += 1;
321 }
322 i += 1;
323 }
324 }
325 b'\'' => {
326 i += 1;
328 while i < bytes.len() && bytes[i] != b'\'' {
329 if bytes[i] == b'\\' {
330 i += 1;
331 }
332 i += 1;
333 }
334 }
335 _ => {}
336 }
337 i += 1;
338 }
339
340 if depth == 0 {
341 Some((i, code[start..i].to_string()))
342 } else {
343 None
344 }
345 }
346
347 fn find_matching_paren(code: &str, start: usize) -> Option<(usize, String)> {
349 let bytes = code.as_bytes();
350 if start >= bytes.len() || bytes[start] != b'(' {
351 return None;
352 }
353
354 let mut depth = 1;
355 let mut i = start + 1;
356
357 while i < bytes.len() && depth > 0 {
358 match bytes[i] {
359 b'(' => depth += 1,
360 b')' => depth -= 1,
361 b'"' => {
362 i += 1;
364 while i < bytes.len() && bytes[i] != b'"' {
365 if bytes[i] == b'\\' {
366 i += 1;
367 }
368 i += 1;
369 }
370 }
371 b'\'' => {
372 i += 1;
374 while i < bytes.len() && bytes[i] != b'\'' {
375 if bytes[i] == b'\\' {
376 i += 1;
377 }
378 i += 1;
379 }
380 }
381 _ => {}
382 }
383 i += 1;
384 }
385
386 if depth == 0 {
387 Some((i, code[start..i].to_string()))
388 } else {
389 None
390 }
391 }
392
393 fn find_statement_expr_end(code: &str, start: usize) -> Option<(usize, String)> {
395 let bytes = code.as_bytes();
396 if start + 1 >= bytes.len() || bytes[start] != b'(' || bytes[start + 1] != b'{' {
397 return None;
398 }
399
400 let mut paren_depth = 1;
401 let mut brace_depth = 1;
402 let mut i = start + 2;
403
404 while i < bytes.len() && (paren_depth > 0 || brace_depth > 0) {
405 match bytes[i] {
406 b'(' => paren_depth += 1,
407 b')' => paren_depth -= 1,
408 b'{' => brace_depth += 1,
409 b'}' => brace_depth -= 1,
410 b'"' => {
411 i += 1;
413 while i < bytes.len() && bytes[i] != b'"' {
414 if bytes[i] == b'\\' {
415 i += 1;
416 }
417 i += 1;
418 }
419 }
420 b'\'' => {
421 i += 1;
423 while i < bytes.len() && bytes[i] != b'\'' {
424 if bytes[i] == b'\\' {
425 i += 1;
426 }
427 i += 1;
428 }
429 }
430 _ => {}
431 }
432 i += 1;
433 }
434
435 if paren_depth == 0 && brace_depth == 0 {
436 Some((i, code[start..i].to_string()))
437 } else {
438 None
439 }
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446
447 #[test]
448 fn test_neutralize_attribute() {
449 let neutralizer = GccNeutralizer::new();
450 let source = "void __attribute__((packed)) foo(void) {}";
451 let result = neutralizer.neutralize(source);
452
453 assert!(!result.code.contains("__attribute__"));
454 assert!(result.code.contains("void foo(void) {}"));
455 assert!(!result.transformations.is_empty());
456 assert_eq!(result.transformations[0].kind, TransformKind::Attribute);
457 }
458
459 #[test]
460 fn test_neutralize_attribute_nested() {
461 let neutralizer = GccNeutralizer::new();
462 let source = "void __attribute__((unused, aligned(16))) bar(void) {}";
463 let result = neutralizer.neutralize(source);
464
465 assert!(!result.code.contains("__attribute__"));
466 assert!(result.code.contains("void bar(void) {}"));
467 }
468
469 #[test]
470 fn test_neutralize_extension() {
471 let neutralizer = GccNeutralizer::new();
472 let source = "__extension__ union { int x; float y; };";
473 let result = neutralizer.neutralize(source);
474
475 assert!(!result.code.contains("__extension__"));
476 assert!(result.code.contains("union { int x; float y; };"));
477 }
478
479 #[test]
480 fn test_neutralize_typeof() {
481 let neutralizer = GccNeutralizer::new();
482 let source = "typeof(foo) bar;";
483 let result = neutralizer.neutralize(source);
484
485 assert!(!result.code.contains("typeof"));
486 assert!(result.code.contains("int bar;"));
487 }
488
489 #[test]
490 fn test_neutralize_typeof_underscore() {
491 let neutralizer = GccNeutralizer::new();
492 let source = "__typeof__(x) y;";
493 let result = neutralizer.neutralize(source);
494
495 assert!(!result.code.contains("__typeof__"));
496 assert!(result.code.contains("int y;"));
497 }
498
499 #[test]
500 fn test_neutralize_asm() {
501 let neutralizer = GccNeutralizer::new();
502 let source = "void foo(void) { __asm__ volatile(\"nop\"); }";
503 let result = neutralizer.neutralize(source);
504
505 assert!(!result.code.contains("__asm__"));
506 assert!(result.code.contains("{ 0 }"));
508 }
509
510 #[test]
511 fn test_neutralize_restrict() {
512 let neutralizer = GccNeutralizer::new();
513 let source = "void foo(int * __restrict p) {}";
514 let result = neutralizer.neutralize(source);
515
516 assert!(!result.code.contains("__restrict"));
517 assert!(result.code.contains("int * p"));
518 }
519
520 #[test]
521 fn test_neutralize_inline() {
522 let neutralizer = GccNeutralizer::new();
523 let source = "__inline__ void foo(void) {}";
524 let result = neutralizer.neutralize(source);
525
526 assert!(!result.code.contains("__inline__"));
527 assert!(result.code.contains("void foo(void)"));
528 }
529
530 #[test]
531 fn test_neutralize_statement_expression() {
532 let neutralizer = GccNeutralizer::new();
533 let source = "int x = ({ int y = 5; y + 1; });";
534 let result = neutralizer.neutralize(source);
535
536 assert!(!result.code.contains("({"));
537 assert!(result.code.contains("int x = (0);"));
538 }
539
540 #[test]
541 fn test_neutralize_builtin_offsetof() {
542 let neutralizer = GccNeutralizer::new();
543 let source = "int off = __builtin_offsetof(struct foo, bar);";
544 let result = neutralizer.neutralize(source);
545
546 assert!(!result.code.contains("__builtin_offsetof"));
547 assert!(result.code.contains("int off = 0;"));
548 }
549
550 #[test]
551 fn test_neutralize_multiple() {
552 let neutralizer = GccNeutralizer::new();
553 let source = r#"
554__extension__ struct {
555 __attribute__((packed)) int x;
556} __attribute__((aligned(16)));
557"#;
558 let result = neutralizer.neutralize(source);
559
560 assert!(!result.code.contains("__extension__"));
561 assert!(!result.code.contains("__attribute__"));
562 }
563
564 #[test]
565 fn test_transformation_tracking() {
566 let neutralizer = GccNeutralizer::new();
567 let source = "__attribute__((unused)) int x;";
568 let result = neutralizer.neutralize(source);
569
570 assert!(!result.transformations.is_empty());
571 let trans = &result.transformations[0];
572 assert_eq!(trans.kind, TransformKind::Attribute);
573 assert!(trans.original_text.contains("__attribute__"));
574 }
575
576 #[test]
577 fn test_no_false_positives() {
578 let neutralizer = GccNeutralizer::new();
579 let source = "int attribute_count; int typeof_var;";
580 let result = neutralizer.neutralize(source);
581
582 assert!(result.code.contains("attribute_count"));
584 assert!(result.code.contains("typeof_var"));
586 }
587
588 #[test]
589 fn test_string_literal_handling() {
590 let neutralizer = GccNeutralizer::new();
591 let source = "char *s = \"test\";";
595 let result = neutralizer.neutralize(source);
596
597 assert!(result.code.contains("\"test\""));
599 }
600}