rusty_cpp/parser/
external_annotations.rs

1// External annotations parser - handles safety and lifetime annotations
2// for third-party functions that can't be modified
3//
4// External functions can be marked as:
5// - [safe] - programmer has audited the function and confirmed it follows safety rules
6//           (e.g., std::string::length() is safe - no UB, no raw pointers exposed)
7// - [unsafe] - function may have unsafe behavior, must be called from @unsafe block
8//
9// NOTE: The distinction is about programmer audit, not tool verification.
10// [safe] external functions can be called directly from @safe code.
11
12use std::collections::HashMap;
13use std::path::Path;
14use std::fs;
15use regex::Regex;
16
17/// Safety level for external functions.
18/// - Safe: Programmer has audited and confirmed the function follows safety rules.
19///         Can be called directly from @safe code without @unsafe block.
20/// - Unsafe: Function may have unsafe behavior. Must be called from @unsafe context.
21#[derive(Debug, Clone, PartialEq)]
22pub enum ExternalSafety {
23    Safe,    // Programmer audited, safe to call from @safe code
24    Unsafe,  // Must be called from @unsafe block
25}
26
27#[derive(Debug, Clone)]
28pub struct ExternalFunctionAnnotation {
29    pub name: String,
30    pub safety: ExternalSafety,
31    pub lifetime_spec: Option<String>, // Raw lifetime specification for future use
32    pub param_lifetimes: Vec<String>,  // Parameter lifetime annotations
33    pub return_lifetime: Option<String>, // Return type lifetime
34    pub lifetime_constraints: Vec<String>, // Where clauses
35}
36
37#[derive(Debug, Clone)]
38pub struct ExternalProfile {
39    pub name: String,
40    pub safe_patterns: Vec<String>,
41    pub unsafe_patterns: Vec<String>,
42}
43
44#[derive(Debug, Clone)]
45pub struct ExternalAnnotations {
46    // Explicit function annotations
47    pub functions: HashMap<String, ExternalFunctionAnnotation>,
48
49    // Pattern-based whitelists and blacklists
50    pub whitelist_patterns: Vec<String>,
51    pub blacklist_patterns: Vec<String>,
52
53    // Named profiles for different libraries
54    pub profiles: HashMap<String, ExternalProfile>,
55
56    // Currently active profile
57    pub active_profile: Option<String>,
58
59    // Unsafe scopes (classes/namespaces marked as entirely unsafe)
60    pub unsafe_scopes: Vec<String>,
61
62    // Unsafe types - types whose internal structure should not be analyzed
63    // A @safe class can have unsafe_type fields without triggering internal analysis
64    pub unsafe_types: Vec<String>,
65}
66
67impl ExternalAnnotations {
68    pub fn new() -> Self {
69        let mut annotations = ExternalAnnotations {
70            functions: HashMap::new(),
71            whitelist_patterns: Vec::new(),
72            blacklist_patterns: Vec::new(),
73            profiles: HashMap::new(),
74            active_profile: None,
75            unsafe_scopes: Vec::new(),
76            unsafe_types: Vec::new(),
77        };
78
79        // Load default annotations
80        annotations.load_defaults();
81        annotations
82    }
83    
84    pub fn from_file(path: &Path) -> Result<Self, String> {
85        let content = fs::read_to_string(path)
86            .map_err(|e| format!("Failed to read external annotations: {}", e))?;
87        
88        let mut annotations = Self::new();
89        annotations.parse_content(&content)?;
90        Ok(annotations)
91    }
92    
93    fn extract_from_comments(&self, content: &str) -> String {
94        let mut result = String::new();
95        let mut in_comment_block = false;
96        let mut comment_content = String::new();
97        
98        for line in content.lines() {
99            let trimmed = line.trim();
100            
101            // Check for C++ comment with @external
102            if trimmed.starts_with("//") {
103                let comment = &trimmed[2..].trim();
104                if comment.starts_with("@external:") || in_comment_block {
105                    // Remove the // prefix and add to result
106                    result.push_str(comment);
107                    result.push('\n');
108                    
109                    // Track if we're in a multi-line block
110                    if comment.contains('{') {
111                        in_comment_block = true;
112                    }
113                    if comment.contains('}') {
114                        in_comment_block = false;
115                    }
116                }
117            }
118            // Also handle C-style comments
119            else if trimmed.starts_with("/*") {
120                // Extract content between /* and */
121                if let Some(end) = trimmed.find("*/") {
122                    let comment = &trimmed[2..end].trim();
123                    if comment.starts_with("@external:") {
124                        result.push_str(comment);
125                        result.push('\n');
126                    }
127                }
128            }
129            // If not in comment, still include non-comment @external blocks
130            else if !in_comment_block && trimmed.starts_with("@external:") {
131                result.push_str(line);
132                result.push('\n');
133                if trimmed.contains('{') {
134                    in_comment_block = true;
135                }
136            }
137            else if in_comment_block && !trimmed.starts_with("//") {
138                // We've left the comment block
139                in_comment_block = false;
140            }
141        }
142        
143        // If no annotations found in comments, return original content
144        if result.is_empty() {
145            content.to_string()
146        } else {
147            result
148        }
149    }
150    
151    pub fn parse_content(&mut self, content: &str) -> Result<(), String> {
152        // First, try to extract annotations from C++ comments
153        let processed_content = self.extract_from_comments(content);
154        
155        // Parse unified @external blocks (primary syntax)
156        self.parse_unified_blocks(&processed_content)?;
157        
158        // Parse @external_function blocks (detailed syntax)
159        self.parse_external_function_blocks(&processed_content)?;
160        
161        // Parse @external_unsafe for classes/namespaces
162        self.parse_unsafe_scopes(&processed_content)?;
163        
164        // Parse @external_whitelist
165        self.parse_whitelist(&processed_content)?;
166        
167        // Parse @external_blacklist
168        self.parse_blacklist(&processed_content)?;
169        
170        // Parse @external_profile blocks
171        self.parse_profiles(&processed_content)?;
172        
173        Ok(())
174    }
175    
176    fn parse_unified_blocks(&mut self, content: &str) -> Result<(), String> {
177        // Parse @external: { func: [safety, lifetime] } blocks
178        let unified_re = Regex::new(r"@external:\s*\{([^}]+)\}").unwrap();
179        
180        for cap in unified_re.captures_iter(content) {
181            if let Some(block) = cap.get(1) {
182                self.parse_unified_entries(block.as_str())?;
183            }
184        }
185        
186        Ok(())
187    }
188    
189    fn parse_unified_entries(&mut self, block: &str) -> Result<(), String> {
190        // First, split block into individual entries
191        // Entries can be on separate lines OR separated by commas on the same line
192        // But we need to be careful not to split inside brackets [...]
193        let entries = self.split_entries(block);
194
195        for entry in entries {
196            let entry = entry.trim();
197            if entry.is_empty() || entry.starts_with("//") {
198                continue;
199            }
200
201            // Parse entries like: function_name: [safety, lifetime_spec]
202            // or: type_name: [unsafe_type]
203            // Note: function names can contain :: (e.g., rusty::Option::is_none)
204            // So we look for ": [" which is the separator between name and spec
205            if let Some(sep_pos) = entry.find(": [") {
206                let name = entry[..sep_pos].trim().to_string();
207                let spec_str = entry[sep_pos + 2..].trim();  // Skip ": " to get "[...]"
208
209                // Parse [safety, lifetime] or [unsafe_type] array
210                if spec_str.starts_with('[') && spec_str.ends_with(']') {
211                    let inner = &spec_str[1..spec_str.len()-1];
212                    let parts: Vec<&str> = inner.split(',').map(|s| s.trim()).collect();
213
214                    if parts.len() >= 1 {
215                        // Check for unsafe_type annotation
216                        if parts[0] == "unsafe_type" {
217                            self.unsafe_types.push(name);
218                            continue;
219                        }
220
221                        let safety = match parts[0] {
222                            "safe" => ExternalSafety::Safe,
223                            "unsafe" => ExternalSafety::Unsafe,
224                            _ => continue,
225                        };
226
227                        let lifetime_spec = if parts.len() >= 2 {
228                            Some(parts[1..].join(","))
229                        } else {
230                            None
231                        };
232
233                        let (param_lifetimes, return_lifetime, constraints) =
234                            if let Some(ref spec) = lifetime_spec {
235                                self.parse_lifetime_specification(spec)
236                            } else {
237                                (Vec::new(), None, Vec::new())
238                            };
239
240
241                        self.functions.insert(name.clone(), ExternalFunctionAnnotation {
242                            name,
243                            safety,
244                            lifetime_spec,
245                            param_lifetimes,
246                            return_lifetime,
247                            lifetime_constraints: constraints,
248                        });
249                    }
250                }
251            }
252        }
253
254        Ok(())
255    }
256
257    /// Split a block into individual entries, handling:
258    /// - Entries on separate lines
259    /// - Entries separated by commas on the same line
260    /// - Not splitting inside brackets [...]
261    fn split_entries(&self, block: &str) -> Vec<String> {
262        let mut entries = Vec::new();
263        let mut current_entry = String::new();
264        let mut bracket_depth = 0;
265
266        for ch in block.chars() {
267            match ch {
268                '[' => {
269                    bracket_depth += 1;
270                    current_entry.push(ch);
271                }
272                ']' => {
273                    bracket_depth -= 1;
274                    current_entry.push(ch);
275                    // If we've closed a bracket and we're at depth 0, this entry might be complete
276                    if bracket_depth == 0 {
277                        // Check if next non-whitespace char is comma or newline
278                        // For now, just mark that we've completed a bracketed section
279                    }
280                }
281                ',' if bracket_depth == 0 => {
282                    // Entry separator (outside brackets)
283                    let trimmed = current_entry.trim();
284                    if !trimmed.is_empty() {
285                        entries.push(trimmed.to_string());
286                    }
287                    current_entry.clear();
288                }
289                '\n' => {
290                    // Newline can also be an entry separator
291                    let trimmed = current_entry.trim();
292                    if !trimmed.is_empty() && trimmed.contains(": [") && trimmed.contains(']') {
293                        entries.push(trimmed.to_string());
294                        current_entry.clear();
295                    } else {
296                        // Continue building current entry (might be multi-line)
297                        current_entry.push(' ');
298                    }
299                }
300                _ => {
301                    current_entry.push(ch);
302                }
303            }
304        }
305
306        // Don't forget the last entry
307        let trimmed = current_entry.trim();
308        if !trimmed.is_empty() {
309            entries.push(trimmed.to_string());
310        }
311
312        entries
313    }
314
315    fn parse_external_function_blocks(&mut self, content: &str) -> Result<(), String> {
316        // Parse @external_function: name { safety: ..., lifetime: ..., where: ... }
317        let func_re = Regex::new(r"@external_function:\s*(\w+)\s*\{([^}]+)\}").unwrap();
318
319        for cap in func_re.captures_iter(content) {
320            if let (Some(name), Some(block)) = (cap.get(1), cap.get(2)) {
321                let func_name = name.as_str().to_string();
322                let block_content = block.as_str();
323
324                // Parse safety field - safe or unsafe
325                let safety = if block_content.contains("safety: safe") {
326                    ExternalSafety::Safe
327                } else if block_content.contains("safety: unsafe") {
328                    ExternalSafety::Unsafe
329                } else {
330                    // Default to unsafe (conservative choice for external code)
331                    ExternalSafety::Unsafe
332                };
333                
334                // Parse lifetime field
335                let lifetime_re = Regex::new(r"lifetime:\s*([^\n]+)").unwrap();
336                let lifetime_spec = lifetime_re.captures(block_content)
337                    .and_then(|c| c.get(1))
338                    .map(|m| m.as_str().trim().to_string());
339                
340                // Parse where field
341                let where_re = Regex::new(r"where:\s*([^\n]+)").unwrap();
342                let where_clause = where_re.captures(block_content)
343                    .and_then(|c| c.get(1))
344                    .map(|m| m.as_str().trim().to_string());
345                
346                let (param_lifetimes, return_lifetime, mut constraints) = 
347                    if let Some(ref spec) = lifetime_spec {
348                        self.parse_lifetime_specification(spec)
349                    } else {
350                        (Vec::new(), None, Vec::new())
351                    };
352                
353                if let Some(where_str) = where_clause {
354                    constraints.push(where_str);
355                }
356                
357                self.functions.insert(func_name.clone(), ExternalFunctionAnnotation {
358                    name: func_name,
359                    safety,
360                    lifetime_spec,
361                    param_lifetimes,
362                    return_lifetime,
363                    lifetime_constraints: constraints,
364                });
365            }
366        }
367        
368        Ok(())
369    }
370    
371    fn parse_lifetime_specification(&self, spec: &str) -> (Vec<String>, Option<String>, Vec<String>) {
372        let mut param_lifetimes = Vec::new();
373        let mut return_lifetime = None;
374        let mut constraints = Vec::new();
375        
376        // Split by "where" clause if present
377        let parts: Vec<&str> = spec.split("where").collect();
378        let main_spec = parts[0].trim();
379        
380        if parts.len() > 1 {
381            constraints.push(parts[1].trim().to_string());
382        }
383        
384        // Parse main specification (params) -> return
385        if let Some(arrow_pos) = main_spec.find("->") {
386            let params_part = main_spec[..arrow_pos].trim();
387            let return_part = main_spec[arrow_pos + 2..].trim();
388            
389            // Parse parameters
390            if params_part.starts_with('(') && params_part.ends_with(')') {
391                let params_inner = &params_part[1..params_part.len()-1];
392                for param in params_inner.split(',') {
393                    param_lifetimes.push(param.trim().to_string());
394                }
395            }
396            
397            // Parse return type
398            return_lifetime = Some(return_part.to_string());
399        } else {
400            // No parameters, just return type
401            return_lifetime = Some(main_spec.to_string());
402        }
403        
404        (param_lifetimes, return_lifetime, constraints)
405    }
406    
407    fn parse_unsafe_scopes(&mut self, content: &str) -> Result<(), String> {
408        // Parse @external_unsafe: namespace::* or @external_unsafe: class::*
409        let unsafe_scope_re = Regex::new(r"@external_unsafe:\s*([^\s]+)").unwrap();
410        
411        for cap in unsafe_scope_re.captures_iter(content) {
412            if let Some(scope) = cap.get(1) {
413                self.unsafe_scopes.push(scope.as_str().to_string());
414            }
415        }
416        
417        // Also parse block syntax: @external_unsafe: { scopes: [...] }
418        let unsafe_block_re = Regex::new(r"@external_unsafe:\s*\{[^}]*scopes:\s*\[([^\]]+)\]").unwrap();
419        
420        if let Some(cap) = unsafe_block_re.captures(content) {
421            if let Some(scopes) = cap.get(1) {
422                for scope in scopes.as_str().split(',') {
423                    let scope = scope.trim().trim_matches('"').to_string();
424                    if !scope.is_empty() {
425                        self.unsafe_scopes.push(scope);
426                    }
427                }
428            }
429        }
430        
431        Ok(())
432    }
433    
434    fn parse_whitelist(&mut self, content: &str) -> Result<(), String> {
435        let whitelist_re = Regex::new(r"@external_whitelist:\s*\{[^}]*patterns:\s*\[([^\]]+)\]").unwrap();
436        
437        if let Some(cap) = whitelist_re.captures(content) {
438            if let Some(patterns) = cap.get(1) {
439                for pattern in patterns.as_str().split(',') {
440                    let pattern = pattern.trim().trim_matches('"').to_string();
441                    if !pattern.is_empty() {
442                        self.whitelist_patterns.push(pattern);
443                    }
444                }
445            }
446        }
447        
448        Ok(())
449    }
450    
451    fn parse_blacklist(&mut self, content: &str) -> Result<(), String> {
452        let blacklist_re = Regex::new(r"@external_blacklist:\s*\{[^}]*patterns:\s*\[([^\]]+)\]").unwrap();
453        
454        if let Some(cap) = blacklist_re.captures(content) {
455            if let Some(patterns) = cap.get(1) {
456                for pattern in patterns.as_str().split(',') {
457                    let pattern = pattern.trim().trim_matches('"').to_string();
458                    if !pattern.is_empty() {
459                        self.blacklist_patterns.push(pattern);
460                    }
461                }
462            }
463        }
464        
465        Ok(())
466    }
467    
468    fn parse_profiles(&mut self, content: &str) -> Result<(), String> {
469        let profile_re = Regex::new(r"@external_profile:\s*(\w+)\s*\{([^}]+)\}").unwrap();
470        
471        for cap in profile_re.captures_iter(content) {
472            if let (Some(name), Some(block)) = (cap.get(1), cap.get(2)) {
473                let mut profile = ExternalProfile {
474                    name: name.as_str().to_string(),
475                    safe_patterns: Vec::new(),
476                    unsafe_patterns: Vec::new(),
477                };
478                
479                // Parse safe and unsafe patterns in the profile
480                let safe_re = Regex::new(r"safe:\s*\[([^\]]+)\]").unwrap();
481                let unsafe_re = Regex::new(r"unsafe:\s*\[([^\]]+)\]").unwrap();
482                
483                if let Some(safe_cap) = safe_re.captures(block.as_str()) {
484                    if let Some(patterns) = safe_cap.get(1) {
485                        for pattern in patterns.as_str().split(',') {
486                            let pattern = pattern.trim().trim_matches('"').to_string();
487                            if !pattern.is_empty() {
488                                profile.safe_patterns.push(pattern);
489                            }
490                        }
491                    }
492                }
493                
494                if let Some(unsafe_cap) = unsafe_re.captures(block.as_str()) {
495                    if let Some(patterns) = unsafe_cap.get(1) {
496                        for pattern in patterns.as_str().split(',') {
497                            let pattern = pattern.trim().trim_matches('"').to_string();
498                            if !pattern.is_empty() {
499                                profile.unsafe_patterns.push(pattern);
500                            }
501                        }
502                    }
503                }
504                
505                self.profiles.insert(profile.name.clone(), profile);
506            }
507        }
508        
509        Ok(())
510    }
511    
512    fn load_defaults(&mut self) {
513        // Load common C standard library functions (unsafe)
514        self.add_c_stdlib_defaults();
515
516        // Load default unsafe types - STL containers whose internal structure should not be analyzed
517        // These types have internal classes with mutable fields that would trigger false positives
518        self.add_stl_unsafe_types();
519
520        // Blacklisted patterns - always unsafe
521        self.blacklist_patterns.extend(vec![
522            "*::operator new*".to_string(),
523            "*::operator delete*".to_string(),
524            "*::malloc".to_string(),
525            "*::free".to_string(),
526            "*::memcpy".to_string(),
527            "*::memmove".to_string(),
528        ]);
529    }
530
531    fn add_stl_unsafe_types(&mut self) {
532        // STL containers and their internal classes have mutable fields (e.g., _ReuseOrAllocNode)
533        // that would trigger false positives when analyzing @safe classes that use them.
534        // Mark these as unsafe_type so their internal structure is not analyzed.
535        self.unsafe_types.extend(vec![
536            // Hash containers and their internals
537            "std::unordered_map*".to_string(),
538            "std::unordered_set*".to_string(),
539            "std::unordered_multimap*".to_string(),
540            "std::unordered_multiset*".to_string(),
541            "_Hashtable*".to_string(),
542            "_Hash_node*".to_string(),
543            "_ReuseOrAllocNode*".to_string(),
544
545            // Other STL containers with complex internals
546            "std::map*".to_string(),
547            "std::set*".to_string(),
548            "std::multimap*".to_string(),
549            "std::multiset*".to_string(),
550            "std::list*".to_string(),
551            "std::forward_list*".to_string(),
552            "std::deque*".to_string(),
553
554            // Smart pointers
555            "std::shared_ptr*".to_string(),
556            "std::weak_ptr*".to_string(),
557            "std::unique_ptr*".to_string(),
558
559            // Function wrappers
560            "std::function*".to_string(),
561            "std::move_only_function*".to_string(),
562
563            // Other STL internals that may have mutable fields
564            "_Rb_tree*".to_string(),
565            "_List_node*".to_string(),
566            "__shared_ptr*".to_string(),
567            "__weak_ptr*".to_string(),
568        ]);
569    }
570    
571    fn add_c_stdlib_defaults(&mut self) {
572        // All C standard library functions are marked [unsafe] because:
573        // - They are external code not verified by RustyCpp
574        // - Programmer takes responsibility for auditing their usage
575        // - This is the correct semantic: unsafe = programmer-audited, safe = tool-verified
576
577        // Common C I/O functions
578        for func in &["printf", "fprintf", "snprintf", "puts", "fputs", "fgets",
579                      "strcmp", "strncmp", "strlen", "atoi", "atof", "exit"] {
580            self.functions.insert(func.to_string(), ExternalFunctionAnnotation {
581                name: func.to_string(),
582                safety: ExternalSafety::Unsafe,  // All external functions are unsafe
583                lifetime_spec: None,
584                param_lifetimes: Vec::new(),
585                return_lifetime: None,
586                lifetime_constraints: Vec::new(),
587            });
588        }
589
590        // Memory management with lifetimes
591        self.functions.insert("malloc".to_string(), ExternalFunctionAnnotation {
592            name: "malloc".to_string(),
593            safety: ExternalSafety::Unsafe,
594            lifetime_spec: Some("(size_t) -> owned void*".to_string()),
595            param_lifetimes: vec!["size_t".to_string()],
596            return_lifetime: Some("owned void*".to_string()),
597            lifetime_constraints: Vec::new(),
598        });
599
600        self.functions.insert("free".to_string(), ExternalFunctionAnnotation {
601            name: "free".to_string(),
602            safety: ExternalSafety::Unsafe,
603            lifetime_spec: Some("(void*) -> void".to_string()),
604            param_lifetimes: vec!["void*".to_string()],
605            return_lifetime: Some("void".to_string()),
606            lifetime_constraints: Vec::new(),
607        });
608
609        self.functions.insert("strcpy".to_string(), ExternalFunctionAnnotation {
610            name: "strcpy".to_string(),
611            safety: ExternalSafety::Unsafe,
612            lifetime_spec: Some("(char* dest, const char* src) -> char* where dest: 'a, return: 'a".to_string()),
613            param_lifetimes: vec!["char* dest".to_string(), "const char* src".to_string()],
614            return_lifetime: Some("char*".to_string()),
615            lifetime_constraints: vec!["dest: 'a, return: 'a".to_string()],
616        });
617
618        // Other memory/string functions
619        for func in &["calloc", "realloc", "memcpy", "memmove",
620                      "memset", "strcat", "sprintf", "gets"] {
621            self.functions.insert(func.to_string(), ExternalFunctionAnnotation {
622                name: func.to_string(),
623                safety: ExternalSafety::Unsafe,
624                lifetime_spec: None,
625                param_lifetimes: Vec::new(),
626                return_lifetime: None,
627                lifetime_constraints: Vec::new(),
628            });
629        }
630    }
631    
632    /// Check if a type is marked as unsafe_type (internal structure should not be analyzed)
633    pub fn is_type_unsafe(&self, type_name: &str) -> bool {
634        for pattern in &self.unsafe_types {
635            if Self::matches_pattern(type_name, pattern) {
636                return true;
637            }
638        }
639        false
640    }
641
642    pub fn is_function_safe(&self, func_name: &str) -> Option<bool> {
643        // First check if function is in an unsafe scope
644        for scope in &self.unsafe_scopes {
645            if Self::matches_pattern(func_name, scope) {
646                return Some(false);  // Entire scope is unsafe
647            }
648        }
649
650        // Then check explicit function annotations
651        // Try exact match first
652        if let Some(annotation) = self.functions.get(func_name) {
653            return Some(annotation.safety == ExternalSafety::Safe);
654        }
655
656        // Try to match against stored qualified names
657        // e.g., if func_name is "swap", check if any "xxx::swap" exists
658        for (annotated_name, annotation) in &self.functions {
659            // Check if annotated_name ends with "::func_name"
660            if annotated_name.ends_with(&format!("::{}", func_name)) {
661                return Some(annotation.safety == ExternalSafety::Safe);
662            }
663            // Also check if func_name is qualified and annotated_name is just the suffix
664            if func_name.ends_with(&format!("::{}", annotated_name)) {
665                return Some(annotation.safety == ExternalSafety::Safe);
666            }
667        }
668        
669        // Then check active profile
670        if let Some(profile_name) = &self.active_profile {
671            if let Some(profile) = self.profiles.get(profile_name) {
672                #[cfg(test)]
673                {
674                    println!("Checking {} against profile {} with safe patterns: {:?}", 
675                        func_name, profile_name, profile.safe_patterns);
676                }
677                // Check unsafe patterns first (they have higher priority)
678                if Self::matches_any_pattern(func_name, &profile.unsafe_patterns) {
679                    return Some(false);
680                }
681                if Self::matches_any_pattern(func_name, &profile.safe_patterns) {
682                    return Some(true);
683                }
684            }
685        }
686        
687        // Check blacklist (higher priority)
688        if Self::matches_any_pattern(func_name, &self.blacklist_patterns) {
689            return Some(false);
690        }
691        
692        // Check whitelist
693        if Self::matches_any_pattern(func_name, &self.whitelist_patterns) {
694            return Some(true);
695        }
696        
697        // No annotation found
698        None
699    }
700    
701    fn matches_any_pattern(name: &str, patterns: &[String]) -> bool {
702        for pattern in patterns {
703            #[cfg(test)]
704            {
705                println!("Checking {} against pattern: '{}'", name, pattern);
706            }
707            if Self::matches_pattern(name, pattern) {
708                return true;
709            }
710        }
711        false
712    }
713    
714    fn matches_pattern(name: &str, pattern: &str) -> bool {
715        // Simple glob-like pattern matching
716        // * matches any sequence of characters
717        // ? matches any single character
718        
719        if pattern == "*" {
720            return true;
721        }
722        
723        // Special case for patterns like "*::functionName"
724        if pattern.starts_with("*::") && !pattern[3..].contains('*') {
725            // Match any class/namespace prefix
726            let suffix = &pattern[3..];
727            return name.ends_with(&format!("::{}", suffix)) || 
728                   name == suffix;
729        }
730        
731        // Special case for patterns ending with * but no other wildcards
732        if pattern.ends_with("*") && pattern.matches('*').count() == 1 {
733            let prefix = &pattern[..pattern.len()-1];
734            return name.starts_with(prefix);
735        }
736        
737        // General wildcard patterns - use regex
738        if pattern.contains('*') || pattern.contains('?') {
739            // Convert glob pattern to regex
740            // Note: order matters - replace literal chars before wildcards
741            let regex_pattern = pattern
742                .replace(".", r"\.")
743                .replace("+", r"\+")
744                .replace("(", r"\(")
745                .replace(")", r"\)")
746                .replace("[", r"\[")
747                .replace("]", r"\]")
748                .replace("^", r"\^")
749                .replace("$", r"\$")
750                .replace("*", ".*")
751                .replace("?", ".");
752            
753            #[cfg(test)]
754            {
755                println!("Pattern '{}' converted to regex: ^{}$", pattern, regex_pattern);
756            }
757            
758            if let Ok(re) = Regex::new(&format!("^{}$", regex_pattern)) {
759                let result = re.is_match(name);
760                #[cfg(test)]
761                {
762                    println!("Matching '{}' against pattern '{}': {}", name, pattern, result);
763                }
764                return result;
765            } else {
766                #[cfg(test)]
767                {
768                    println!("Failed to compile regex for pattern: {}", pattern);
769                }
770            }
771        }
772        
773        // Handle case where pattern is unqualified but name is qualified
774        // e.g., pattern "make_unique" should match name "std::make_unique"
775        if !pattern.contains("::") && name.contains("::") {
776            if name.ends_with(&format!("::{}", pattern)) {
777                return true;
778            }
779        }
780
781        name == pattern
782    }
783    
784    pub fn set_active_profile(&mut self, profile_name: &str) -> Result<(), String> {
785        if self.profiles.contains_key(profile_name) {
786            self.active_profile = Some(profile_name.to_string());
787            Ok(())
788        } else {
789            Err(format!("Profile '{}' not found", profile_name))
790        }
791    }
792}
793
794#[cfg(test)]
795mod tests {
796    use super::*;
797    
798    #[test]
799    fn test_qualified_name_matching() {
800        // Test that unqualified names match qualified annotations
801        let content = r#"
802        // @external: {
803        //   std::swap: [safe, (T& a, T& b) -> void]
804        //   my_namespace::helper: [unsafe, () -> void]
805        // }
806        "#;
807
808        let mut annotations = ExternalAnnotations::new();
809        annotations.parse_content(content).unwrap();
810
811        // Unqualified name should match qualified annotation
812        assert_eq!(annotations.is_function_safe("swap"), Some(true));  // safe
813        assert_eq!(annotations.is_function_safe("helper"), Some(false));  // unsafe
814        // Qualified name should still work
815        assert_eq!(annotations.is_function_safe("std::swap"), Some(true));  // safe
816    }
817
818    #[test]
819    fn test_safe_vs_unsafe_annotation() {
820        let content = r#"
821        // @external: {
822        //   std::string::length: [safe, (&self) -> size_t]
823        //   std::string::c_str: [unsafe, (&self) -> const char*]
824        // }
825        "#;
826
827        let mut annotations = ExternalAnnotations::new();
828        annotations.parse_content(content).unwrap();
829
830        // Safe functions return Some(true)
831        assert_eq!(annotations.is_function_safe("std::string::length"), Some(true));
832        assert_eq!(annotations.is_function_safe("length"), Some(true));
833
834        // Unsafe functions return Some(false)
835        assert_eq!(annotations.is_function_safe("std::string::c_str"), Some(false));
836        assert_eq!(annotations.is_function_safe("c_str"), Some(false));
837    }
838
839    #[test]
840    fn test_parse_safety_block() {
841        let content = r#"
842        // @external: {
843        //   malloc: [unsafe]
844        //   printf: [unsafe]
845        //   custom_func: [unsafe]
846        // }
847        "#;
848
849        let mut annotations = ExternalAnnotations::new();
850        annotations.parse_content(content).unwrap();
851
852        // All external functions must be marked [unsafe]
853        assert_eq!(annotations.is_function_safe("malloc"), Some(false));
854        assert_eq!(annotations.is_function_safe("printf"), Some(false));
855        assert_eq!(annotations.is_function_safe("custom_func"), Some(false));
856    }
857    
858    #[test]
859    fn test_pattern_matching() {
860        let mut annotations = ExternalAnnotations::new();
861        annotations.whitelist_patterns.push("std::*".to_string());
862        annotations.blacklist_patterns.push("*::malloc".to_string());
863        
864        assert_eq!(annotations.is_function_safe("std::vector::push_back"), Some(true));
865        assert_eq!(annotations.is_function_safe("custom::malloc"), Some(false));
866        assert_eq!(annotations.is_function_safe("unknown_func"), None);
867    }
868    
869    #[test]
870    fn test_profiles() {
871        let content = r#"
872        // @external_profile: qt {
873        //   safe: ["Q*::*", "qt::*"]
874        //   unsafe: ["*::connect"]
875        // }
876        "#;
877        
878        let mut annotations = ExternalAnnotations::new();
879        annotations.parse_content(content).unwrap();
880        
881        // Debug: check if profile was parsed
882        println!("Profiles parsed: {:?}", annotations.profiles.keys().collect::<Vec<_>>());
883        
884        annotations.set_active_profile("qt").unwrap();
885        
886        // Debug: check pattern matching
887        println!("Checking QWidget::show");
888        let result1 = annotations.is_function_safe("QWidget::show");
889        println!("Result: {:?}", result1);
890        
891        println!("Checking QObject::connect");
892        let result2 = annotations.is_function_safe("QObject::connect");
893        println!("Result: {:?}", result2);
894        
895        assert_eq!(result1, Some(true));
896        assert_eq!(result2, Some(false));
897    }
898    
899    #[test]
900    fn test_wildcard_patterns() {
901        assert!(ExternalAnnotations::matches_pattern("std::vector::size", "*::size"));
902        assert!(ExternalAnnotations::matches_pattern("malloc", "malloc"));
903        assert!(ExternalAnnotations::matches_pattern("my_malloc", "*malloc"));
904        assert!(ExternalAnnotations::matches_pattern("malloc_wrapper", "malloc*"));
905        assert!(!ExternalAnnotations::matches_pattern("free", "malloc"));
906    }
907    
908    #[test]
909    fn test_qt_pattern() {
910        // Test the specific pattern that's failing
911        let pattern = "Q*::*";
912        let name = "QWidget::show";
913        println!("Testing if '{}' matches pattern '{}'", name, pattern);
914        assert!(ExternalAnnotations::matches_pattern(name, pattern),
915            "Pattern '{}' should match '{}'", pattern, name);
916    }
917
918    #[test]
919    fn test_unsafe_type_annotation() {
920        let content = r#"
921        // @external: {
922        //   std::unordered_map: [unsafe_type]
923        //   MyCustomContainer: [unsafe_type]
924        // }
925        "#;
926
927        let mut annotations = ExternalAnnotations::new();
928        annotations.parse_content(content).unwrap();
929
930        // Check that the types are marked as unsafe
931        assert!(annotations.is_type_unsafe("std::unordered_map"));
932        assert!(annotations.is_type_unsafe("MyCustomContainer"));
933        // Non-annotated type should not be unsafe (unless it matches default patterns)
934        assert!(!annotations.is_type_unsafe("MyOtherClass"));
935    }
936
937    #[test]
938    fn test_default_stl_unsafe_types() {
939        let annotations = ExternalAnnotations::new();
940
941        // STL containers should be marked as unsafe_type by default
942        assert!(annotations.is_type_unsafe("std::unordered_map<int, int>"));
943        assert!(annotations.is_type_unsafe("std::unordered_set<std::string>"));
944        assert!(annotations.is_type_unsafe("_ReuseOrAllocNode"));
945        assert!(annotations.is_type_unsafe("std::function<void()>"));
946
947        // Regular user classes should not be unsafe
948        assert!(!annotations.is_type_unsafe("MyClass"));
949        assert!(!annotations.is_type_unsafe("UserDefinedMap"));
950    }
951
952    #[test]
953    fn test_split_entries_multiline() {
954        // Test that split_entries handles entries on separate lines
955        let annotations = ExternalAnnotations::new();
956        let block = r#"
957            rusty::Option::is_none: [unsafe, (&self) -> bool]
958            rusty::Option::is_some: [unsafe, (&self) -> bool]
959        "#;
960
961        let entries = annotations.split_entries(block);
962        assert_eq!(entries.len(), 2);
963        assert!(entries[0].contains("rusty::Option::is_none"));
964        assert!(entries[1].contains("rusty::Option::is_some"));
965    }
966
967    #[test]
968    fn test_split_entries_comma_separated() {
969        // Test that split_entries handles comma-separated entries on the same line
970        let annotations = ExternalAnnotations::new();
971        let block = r#"foo: [unsafe, () -> void], bar: [unsafe, () -> int]"#;
972
973        let entries = annotations.split_entries(block);
974        assert_eq!(entries.len(), 2);
975        assert!(entries[0].contains("foo"));
976        assert!(entries[1].contains("bar"));
977    }
978
979    #[test]
980    fn test_split_entries_preserves_brackets() {
981        // Test that split_entries doesn't split inside brackets
982        let annotations = ExternalAnnotations::new();
983        let block = r#"func: [unsafe, (int, float) -> void]"#;
984
985        let entries = annotations.split_entries(block);
986        assert_eq!(entries.len(), 1);
987        assert!(entries[0].contains("(int, float)"));
988    }
989
990    #[test]
991    fn test_qualified_function_name_parsing() {
992        // Test that function names with :: are parsed correctly (not split on first :)
993        let content = r#"
994        // @external: {
995        //   rusty::Option::is_none: [unsafe, (&self) -> bool]
996        //   std::vector::push_back: [unsafe, (&mut self, T) -> void]
997        // }
998        "#;
999
1000        let mut annotations = ExternalAnnotations::new();
1001        annotations.parse_content(content).unwrap();
1002
1003        // Check that the fully qualified names are stored correctly
1004        assert!(annotations.functions.contains_key("rusty::Option::is_none"));
1005        assert!(annotations.functions.contains_key("std::vector::push_back"));
1006    }
1007}