sqc 0.4.13

Software Code Quality - CERT C compliance checker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
// Common AST utilities for CERT C rules
// This module provides reusable functions for navigating and extracting information from the C AST

use tree_sitter::Node;

// ============================================================================
// Node Text Extraction
// ============================================================================

/// Extract the text content of a node from the source code
pub fn get_node_text<'a>(node: &Node, source: &'a str) -> &'a str {
    &source[node.start_byte()..node.end_byte()]
}

/// Extract the text content of a node as an owned String
pub fn get_node_text_owned(node: &Node, source: &str) -> String {
    source[node.start_byte()..node.end_byte()].to_string()
}

// ============================================================================
// AST Navigation
// ============================================================================

/// Find the containing function definition for a given node
/// Returns the function_definition node that contains the given node
pub fn find_containing_function<'a>(node: &Node<'a>) -> Option<Node<'a>> {
    let mut current = Some(*node);
    while let Some(n) = current {
        if n.kind() == "function_definition" {
            return Some(n);
        }
        current = n.parent();
    }
    None
}

/// Check if a node is inside a loop (for, while, or do-while)
pub fn is_inside_loop(node: &Node) -> bool {
    let mut current = node.parent();
    while let Some(parent) = current {
        match parent.kind() {
            "for_statement" | "while_statement" | "do_statement" => return true,
            "function_definition" => return false, // Stop at function boundary
            _ => current = parent.parent(),
        }
    }
    false
}

/// Check if a node is inside a conditional statement (if, else if, switch)
#[allow(dead_code)]
pub fn is_inside_conditional(node: &Node) -> bool {
    let mut current = node.parent();
    while let Some(parent) = current {
        match parent.kind() {
            "if_statement" | "switch_statement" => return true,
            "function_definition" => return false, // Stop at function boundary
            _ => current = parent.parent(),
        }
    }
    false
}

// ============================================================================
// Identifier Extraction from Declarators
// ============================================================================

/// Extract identifier name from a declarator node
/// Handles simple identifiers, pointer declarators, and array declarators
///
/// Examples:
/// - int x           -> "x"
/// - int *ptr        -> "ptr"
/// - int arr[10]     -> "arr"
/// - int **ptr       -> "ptr"
/// - int (*fn)(int)  -> "fn"
pub fn get_identifier_from_declarator(declarator: &Node, source: &str) -> String {
    match declarator.kind() {
        "identifier" => get_node_text_owned(declarator, source),
        "pointer_declarator"
        | "array_declarator"
        | "function_declarator"
        | "parenthesized_declarator" => {
            // Recursively search for the identifier
            for i in 0..declarator.child_count() {
                if let Some(child) = declarator.child(i) {
                    if child.kind() == "identifier" {
                        return get_node_text_owned(&child, source);
                    }
                    let nested = get_identifier_from_declarator(&child, source);
                    if !nested.is_empty() {
                        return nested;
                    }
                }
            }
            String::new() // Return empty string for consistency with original implementations
        }
        _ => String::new(), // Return empty string for consistency with original implementations
    }
}

/// Find identifier in a declarator node, returns Option instead of "unknown" string
pub fn find_identifier_in_declarator(declarator: &Node, source: &str) -> Option<String> {
    // Recursively find identifier in declarator
    for i in 0..declarator.child_count() {
        if let Some(child) = declarator.child(i) {
            if child.kind() == "identifier" {
                return Some(get_node_text_owned(&child, source));
            } else if matches!(
                child.kind(),
                "array_declarator"
                    | "pointer_declarator"
                    | "function_declarator"
                    | "parenthesized_declarator"
            ) {
                if let Some(id) = find_identifier_in_declarator(&child, source) {
                    return Some(id);
                }
            }
        }
    }
    None
}

// ============================================================================
// Function Parameter Extraction
// ============================================================================

/// Extract function parameters as (name, full_type) tuples
/// Returns None if the function has no parameters or parameter list not found
pub fn get_function_parameters(
    function_node: &Node,
    source: &str,
) -> Option<Vec<(String, String)>> {
    // Find the parameter list
    for i in 0..function_node.child_count() {
        if let Some(child) = function_node.child(i) {
            if child.kind() == "function_declarator" {
                return extract_parameters(&child, source);
            }
        }
    }
    None
}

/// Extract parameters from a function declarator node
fn extract_parameters(declarator_node: &Node, source: &str) -> Option<Vec<(String, String)>> {
    let mut parameters = Vec::new();

    // Find parameter_list node
    for i in 0..declarator_node.child_count() {
        if let Some(child) = declarator_node.child(i) {
            if child.kind() == "parameter_list" {
                // Extract each parameter
                for j in 0..child.child_count() {
                    if let Some(param) = child.child(j) {
                        if param.kind() == "parameter_declaration" {
                            if let Some((name, param_type)) = extract_parameter_info(&param, source)
                            {
                                parameters.push((name, param_type));
                            }
                        }
                    }
                }
            }
        }
    }

    if parameters.is_empty() {
        None
    } else {
        Some(parameters)
    }
}

/// Extract parameter information (name and type) from a parameter declaration
fn extract_parameter_info(param_node: &Node, source: &str) -> Option<(String, String)> {
    let param_text = get_node_text(param_node, source);

    // Look for declarator pattern
    for i in 0..param_node.child_count() {
        if let Some(child) = param_node.child(i) {
            if matches!(
                child.kind(),
                "array_declarator" | "pointer_declarator" | "function_declarator"
            ) {
                // Found array, pointer, or function pointer parameter
                if let Some(identifier) = find_identifier_in_declarator(&child, source) {
                    return Some((identifier, param_text.to_string()));
                }
            } else if child.kind() == "identifier" {
                // Simple parameter
                let name = get_node_text(&child, source);
                return Some((name.to_string(), param_text.to_string()));
            }
        }
    }

    None
}

/// Check if a variable name appears in the function's parameter list
pub fn is_function_parameter(function_node: &Node, var_name: &str, source: &str) -> bool {
    // Find parameter list in function
    for i in 0..function_node.child_count() {
        if let Some(child) = function_node.child(i) {
            if child.kind() == "function_declarator" {
                for j in 0..child.child_count() {
                    if let Some(param_list) = child.child(j) {
                        if param_list.kind() == "parameter_list" {
                            let param_text = get_node_text(&param_list, source);
                            // Check for word boundaries to avoid substring matches
                            let words: Vec<&str> = param_text
                                .split(|c: char| !c.is_alphanumeric() && c != '_')
                                .collect();
                            if words.contains(&var_name) {
                                return true;
                            }
                        }
                    }
                }
            }
        }
    }
    false
}

// ============================================================================
// Type Checking Utilities
// ============================================================================

/// Check if a parameter type string indicates an array parameter
pub fn is_array_parameter_type(param_type: &str) -> bool {
    param_type.contains('[') || (param_type.contains('*') && !param_type.contains("const char *"))
}

/// Check if a type string represents a pointer type
pub fn is_pointer_type(type_str: &str) -> bool {
    type_str.contains('*')
}

/// Check if a type string represents a signed integer type
#[allow(dead_code)]
pub fn is_signed_type(type_str: &str) -> bool {
    matches!(
        type_str.trim(),
        "int"
            | "short"
            | "long"
            | "char"
            | "signed"
            | "signed int"
            | "signed short"
            | "signed long"
            | "signed char"
            | "int8_t"
            | "int16_t"
            | "int32_t"
            | "int64_t"
            | "ptrdiff_t"
            | "ssize_t"
    )
}

/// Check if a type string represents an unsigned integer type
#[allow(dead_code)]
pub fn is_unsigned_type(type_str: &str) -> bool {
    type_str.contains("unsigned")
        || matches!(
            type_str.trim(),
            "size_t" | "uint8_t" | "uint16_t" | "uint32_t" | "uint64_t" | "uintptr_t" | "uintmax_t"
        )
}

// ============================================================================
// Operator Extraction
// ============================================================================

/// Extract the operator from a binary expression node
pub fn get_binary_operator<'a>(node: &Node, source: &'a str) -> Option<&'a str> {
    // The operator is usually a child of the binary expression
    for i in 0..node.child_count() {
        if let Some(child) = node.child(i) {
            let kind = child.kind();
            // Check if this is an operator token
            if matches!(
                kind,
                "+" | "-"
                    | "*"
                    | "/"
                    | "%"
                    | "=="
                    | "!="
                    | "<"
                    | ">"
                    | "<="
                    | ">="
                    | "&&"
                    | "||"
                    | "&"
                    | "|"
                    | "^"
                    | "<<"
                    | ">>"
                    | "="
                    | "+="
                    | "-="
                    | "*="
                    | "/="
                    | "%="
                    | "&="
                    | "|="
                    | "^="
                    | "<<="
                    | ">>="
            ) {
                return Some(get_node_text(&child, source));
            }
        }
    }
    None
}

// ============================================================================
// Array Size Extraction
// ============================================================================

/// Find array size from declaration in preceding text
/// Looks for patterns like: type array_name[size]
/// Returns the size if found and it's a constant
#[allow(dead_code)]
pub fn find_array_size(array_name: &str, preceding_text: &str) -> Option<usize> {
    // Look for array declaration pattern: array_name[number]
    let pattern = format!("{}[", array_name);

    if let Some(pos) = preceding_text.rfind(&pattern) {
        // Extract the size between [ and ]
        let after_bracket = &preceding_text[pos + pattern.len()..];
        if let Some(close_bracket) = after_bracket.find(']') {
            let size_str = after_bracket[..close_bracket].trim();

            // Try to parse as a number
            if let Ok(size) = size_str.parse::<usize>() {
                return Some(size);
            }

            // Try to handle simple arithmetic expressions like 2*3 or 10+5
            if size_str.contains('*') {
                let parts: Vec<&str> = size_str.split('*').collect();
                if parts.len() == 2 {
                    if let (Ok(a), Ok(b)) = (
                        parts[0].trim().parse::<usize>(),
                        parts[1].trim().parse::<usize>(),
                    ) {
                        return Some(a * b);
                    }
                }
            }
        }
    }

    None
}

/// Get the size of a C type in bytes
/// This is a best-effort approximation for common types
#[allow(dead_code)]
pub fn get_type_size(type_name: &str) -> usize {
    match type_name.trim() {
        "char" | "signed char" | "unsigned char" | "int8_t" | "uint8_t" => 1,
        "short" | "signed short" | "unsigned short" | "int16_t" | "uint16_t" => 2,
        "int" | "signed int" | "unsigned int" | "int32_t" | "uint32_t" | "float" => 4,
        "long" | "signed long" | "unsigned long" | "long long" | "signed long long"
        | "unsigned long long" | "int64_t" | "uint64_t" | "double" | "size_t" | "ptrdiff_t" => 8,
        "long double" => 16,
        t if t.ends_with('*') => 8, // Pointer size on 64-bit
        _ => 4,                     // Default to int size
    }
}

// ============================================================================
// Context Analysis
// ============================================================================

/// Check if a subscript expression is on the left side of an assignment (write context)
/// Handles nested subscripts like matrix[i][j] = value
pub fn is_write_context(node: &Node) -> bool {
    let mut current = *node;

    // Walk up the tree while we're in subscript expressions
    loop {
        if let Some(parent) = current.parent() {
            if parent.kind() == "assignment_expression" {
                // Check if current node (or its ancestor subscript) is the left side
                if let Some(left) = parent.child_by_field_name("left") {
                    return left.id() == current.id();
                }
                return false;
            } else if parent.kind() == "subscript_expression" {
                // Keep walking up through nested subscripts
                current = parent;
            } else {
                // Hit a different node type, not a write context
                return false;
            }
        } else {
            // No parent, not a write context
            return false;
        }
    }
}

/// Check if a node is part of a sizeof expression
#[allow(dead_code)]
pub fn is_in_sizeof(node: &Node) -> bool {
    let mut current = node.parent();
    while let Some(parent) = current {
        if parent.kind() == "sizeof_expression" {
            return true;
        }
        if parent.kind() == "function_definition" {
            return false;
        }
        current = parent.parent();
    }
    false
}

// ============================================================================
// Control Flow Navigation Utilities
// ============================================================================

/// Find the containing for loop statement for a given node
///
/// # Arguments
/// * `node` - The starting node to search from
///
/// # Returns
/// The for_statement node that contains the given node, or None if not found
///
/// # Examples
/// ```no_run
/// use sqc::utility::cert_c::ast_utils::find_containing_for_loop;
/// use tree_sitter::Node;
/// // When checking a subscript inside a for loop:
/// // let subscript_node: Node = /* get from parsed AST */;
/// // if let Some(for_loop) = find_containing_for_loop(&subscript_node) {
/// //     // Analyze loop bounds
/// // }
/// ```
pub fn find_containing_for_loop<'a>(node: &Node<'a>) -> Option<Node<'a>> {
    let mut current = node.parent();
    while let Some(n) = current {
        if n.kind() == "for_statement" {
            return Some(n);
        }
        current = n.parent();
    }
    None
}

/// Find the containing if statement for a given node
///
/// # Arguments
/// * `node` - The starting node to search from
///
/// # Returns
/// The if_statement node that contains the given node, or None if not found
///
/// # Examples
/// ```no_run
/// use sqc::utility::cert_c::ast_utils::find_containing_if_statement;
/// use tree_sitter::Node;
/// // When checking if array access is within a bounds check:
/// // let subscript_node: Node = /* get from parsed AST */;
/// // if let Some(if_stmt) = find_containing_if_statement(&subscript_node) {
/// //     // Check if condition validates bounds
/// // }
/// ```
pub fn find_containing_if_statement<'a>(node: &Node<'a>) -> Option<Node<'a>> {
    let mut current = node.parent();
    while let Some(n) = current {
        if n.kind() == "if_statement" {
            return Some(n);
        }
        current = n.parent();
    }
    None
}

// ============================================================================
// Struct Type Resolution
// ============================================================================

/// Extract the struct name from a C type string.
///
/// Handles patterns like:
/// - `"struct MyStruct *"` → `Some("MyStruct")`
/// - `"struct MyStruct"` → `Some("MyStruct")`
/// - `"MyStruct *"` → `Some("MyStruct")`
/// - `"MyStruct"` → `Some("MyStruct")`
/// - `"int"` → `None` (primitive type, not a struct)
///
/// For typedef'd structs (e.g., `typedef struct Foo { ... } Foo;`), the
/// type_map entry may be just `"Foo *"` without the `struct` keyword.
pub fn extract_struct_name_from_type(type_str: &str) -> Option<&str> {
    let trimmed = type_str.trim();

    // Strip pointer/const/volatile qualifiers from both ends
    let mut base = trimmed
        .trim_end_matches('*')
        .trim_end()
        .trim_end_matches("const")
        .trim_end_matches("volatile")
        .trim();
    loop {
        let next = base
            .strip_prefix("const ")
            .or_else(|| base.strip_prefix("volatile "))
            .unwrap_or(base)
            .trim();
        if next == base {
            break;
        }
        base = next;
    }

    // Skip obvious primitives
    if matches!(
        base,
        "int"
            | "unsigned int"
            | "signed int"
            | "short"
            | "unsigned short"
            | "long"
            | "unsigned long"
            | "long long"
            | "unsigned long long"
            | "char"
            | "unsigned char"
            | "signed char"
            | "float"
            | "double"
            | "void"
            | "_Bool"
    ) {
        return None;
    }
    // Skip stdint types
    if base.ends_with("_t")
        && (base.starts_with("int") || base.starts_with("uint") || base.starts_with("size"))
    {
        return None;
    }

    // "struct MyStruct" → "MyStruct"
    if let Some(name) = base.strip_prefix("struct ") {
        let name = name.trim();
        if !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
            return Some(name);
        }
        return None;
    }

    // Bare identifier (typedef'd name) — must look like an identifier, not a primitive
    if !base.is_empty()
        && base
            .chars()
            .next()
            .is_some_and(|c| c.is_alphabetic() || c == '_')
        && base.chars().all(|c| c.is_alphanumeric() || c == '_')
    {
        return Some(base);
    }

    None
}

/// Resolve the type of a `field_expression` node using the variable type map
/// and struct field type database.
///
/// Given `s->count` where `s` is declared as `struct MyStruct *s`:
/// 1. Extracts field name "count" from the field_expression
/// 2. Looks up base variable "s" → "struct MyStruct *" in type_map
/// 3. Extracts struct name "MyStruct"
/// 4. Looks up "MyStruct"."count" → "int" in struct_field_types
pub fn resolve_field_expression_type(
    node: &Node,
    source: &str,
    type_map: &std::collections::HashMap<String, String>,
    struct_field_types: &std::collections::HashMap<
        String,
        std::collections::HashMap<String, String>,
    >,
) -> Option<String> {
    let field_node = node.child_by_field_name("field")?;
    let field_name = field_node.utf8_text(source.as_bytes()).ok()?;
    let argument = node.child_by_field_name("argument")?;

    // Resolve the struct type of the argument. Supports chained access
    // (`a.b.c`, `a->b.c`) by recursing through nested field_expressions.
    let base_type = match argument.kind() {
        "identifier" => {
            let base_name = argument.utf8_text(source.as_bytes()).ok()?;
            type_map.get(base_name)?.clone()
        }
        "field_expression" => {
            resolve_field_expression_type(&argument, source, type_map, struct_field_types)?
        }
        "pointer_expression" => {
            // `*p.field` — dereference one pointer level from `p`'s type.
            let inner = argument.child_by_field_name("argument")?;
            let inner_name = inner.utf8_text(source.as_bytes()).ok()?;
            let t = type_map.get(inner_name)?;
            t.strip_suffix(" *")
                .or_else(|| t.strip_suffix('*'))
                .map(|s| s.trim().to_string())?
        }
        _ => return None,
    };

    let struct_name = extract_struct_name_from_type(&base_type)?;

    struct_field_types
        .get(struct_name)
        .and_then(|fields| fields.get(field_name))
        .cloned()
}

#[cfg(test)]
mod tests {
    use super::*;
    use tree_sitter::Parser;

    fn parse_c_code(code: &str) -> (tree_sitter::Tree, String) {
        let mut parser = Parser::new();
        let language = tree_sitter_c::language();
        parser.set_language(&language).unwrap();
        let tree = parser.parse(code, None).unwrap();
        (tree, code.to_string())
    }

    #[test]
    fn test_get_node_text() {
        let (tree, source) = parse_c_code("int x = 5;");
        let root = tree.root_node();
        let text = get_node_text(&root, &source);
        assert_eq!(text, "int x = 5;");
    }

    #[test]
    fn test_find_containing_function() {
        let (tree, _source) = parse_c_code("void foo() { int x = 5; }");
        let root = tree.root_node();

        // Find the declaration node (int x = 5)
        let func_def = root.child(0).unwrap();
        assert_eq!(func_def.kind(), "function_definition");

        // Find a node inside the function
        let compound_stmt = func_def.child_by_field_name("body").unwrap();
        let decl = compound_stmt.child(1).unwrap(); // Skip opening brace

        let containing_func = find_containing_function(&decl);
        assert!(containing_func.is_some());
        assert_eq!(containing_func.unwrap().kind(), "function_definition");
    }

    #[test]
    fn test_find_array_size() {
        let text = "int main() { int arr[10]; }";
        let size = find_array_size("arr", text);
        assert_eq!(size, Some(10));
    }

    #[test]
    fn test_is_signed_type() {
        assert!(is_signed_type("int"));
        assert!(is_signed_type("signed int"));
        assert!(is_signed_type("int32_t"));
        assert!(!is_signed_type("unsigned int"));
        assert!(!is_signed_type("size_t"));
    }

    #[test]
    fn test_is_unsigned_type() {
        assert!(is_unsigned_type("unsigned int"));
        assert!(is_unsigned_type("size_t"));
        assert!(is_unsigned_type("uint32_t"));
        assert!(!is_unsigned_type("int"));
        assert!(!is_unsigned_type("signed int"));
    }

    #[test]
    fn test_get_type_size() {
        assert_eq!(get_type_size("char"), 1);
        assert_eq!(get_type_size("short"), 2);
        assert_eq!(get_type_size("int"), 4);
        assert_eq!(get_type_size("long"), 8);
        assert_eq!(get_type_size("int *"), 8);
    }
}