rusty_cpp/parser/
header_cache.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::fs;
4use regex::Regex;
5use clang::{Clang, Index};
6use crate::debug_println;
7
8use super::annotations::{FunctionSignature, extract_annotations};
9use super::safety_annotations::{SafetyMode, parse_entity_safety};
10use super::external_annotations::ExternalAnnotations;
11
12/// Cache for storing function signatures from header files
13#[derive(Debug)]
14pub struct HeaderCache {
15    /// Map from function name to its lifetime signature
16    signatures: HashMap<String, FunctionSignature>,
17    /// Map from function name to its safety annotation from header
18    pub safety_annotations: HashMap<String, SafetyMode>,
19    /// Paths of headers that have been processed
20    processed_headers: Vec<PathBuf>,
21    /// Include paths to search for headers
22    include_paths: Vec<PathBuf>,
23    /// External annotations found in headers
24    pub external_annotations: ExternalAnnotations,
25}
26
27/// Strip template parameters from a name (e.g., "Option<T>" -> "Option")
28fn strip_template_params(name: &str) -> String {
29    if let Some(pos) = name.find('<') {
30        name[..pos].to_string()
31    } else {
32        name.to_string()
33    }
34}
35
36impl HeaderCache {
37    pub fn new() -> Self {
38        Self {
39            signatures: HashMap::new(),
40            safety_annotations: HashMap::new(),
41            processed_headers: Vec::new(),
42            include_paths: Vec::new(),
43            external_annotations: ExternalAnnotations::new(),
44        }
45    }
46    
47    /// Set the include paths for header file resolution
48    pub fn set_include_paths(&mut self, paths: Vec<PathBuf>) {
49        self.include_paths = paths;
50    }
51    
52    /// Get a function signature by name
53    pub fn get_signature(&self, func_name: &str) -> Option<&FunctionSignature> {
54        self.signatures.get(func_name)
55    }
56    
57    /// Get safety annotation for a function from headers
58    pub fn get_safety_annotation(&self, func_name: &str) -> Option<SafetyMode> {
59        self.safety_annotations.get(func_name).copied()
60    }
61    
62    /// Parse a header file and extract all annotated function signatures
63    pub fn parse_header(&mut self, header_path: &Path) -> Result<(), String> {
64        debug_println!("DEBUG HEADER: Parsing header file: {}", header_path.display());
65
66        // Skip if already processed
67        if self.processed_headers.iter().any(|p| p == header_path) {
68            debug_println!("DEBUG HEADER: Already processed, skipping");
69            return Ok(());
70        }
71        
72        // Parse safety annotations directly from the header file (before libclang parsing)
73        // This ensures we get regular C++ comments (// and /* */) not just Doxygen comments
74        // Store temporarily - we'll qualify the names after LibClang parsing
75        let mut unqualified_annotations = HashMap::new();
76        if let Ok(header_safety_context) = super::safety_annotations::parse_safety_annotations(header_path) {
77            // Store unqualified annotations temporarily
78            for (func_sig, safety_mode) in &header_safety_context.function_overrides {
79                debug_println!("DEBUG HEADER: Found unqualified annotation for '{}': {:?}", func_sig.name, safety_mode);
80                unqualified_annotations.insert(func_sig.name.clone(), *safety_mode);
81            }
82            debug_println!("DEBUG HEADER: Parsed {} unqualified safety annotations from header file", header_safety_context.function_overrides.len());
83        }
84
85        // Also parse external annotations from the header file
86        if let Ok(content) = fs::read_to_string(header_path) {
87            // Parse external annotations from the file content
88            // These might be in comments or in the file directly
89            if let Err(e) = self.external_annotations.parse_content(&content) {
90                debug_println!("DEBUG HEADER: Failed to parse external annotations: {}", e);
91            } else {
92                debug_println!("DEBUG HEADER: Parsed external annotations from header");
93            }
94        }
95        
96        // Initialize Clang
97        let clang = Clang::new()
98            .map_err(|e| format!("Failed to initialize Clang: {:?}", e))?;
99        let index = Index::new(&clang, false, false);
100        
101        // Build arguments with include paths
102        let mut args = vec![
103            "-std=c++17".to_string(), 
104            "-xc++".to_string(),
105            "-fparse-all-comments".to_string(),  // Essential for getting comments from headers
106        ];
107        for include_path in &self.include_paths {
108            args.push(format!("-I{}", include_path.display()));
109        }
110        
111        // Parse the header file
112        let tu = index
113            .parser(header_path)
114            .arguments(&args.iter().map(|s| s.as_str()).collect::<Vec<_>>())
115            .parse()
116            .map_err(|e| format!("Failed to parse header {}: {:?}", header_path.display(), e))?;
117        
118        // Extract function signatures with annotations
119        let root = tu.get_entity();
120        self.visit_entity_for_signatures(&root);
121
122        // Now qualify the unqualified annotations using the qualified names from LibClang
123        // Build a map from simple method names to their qualified names
124        let mut simple_to_qualified: HashMap<String, Vec<String>> = HashMap::new();
125        for qualified_name in self.safety_annotations.keys() {
126            // Extract the simple name (last component after ::)
127            if let Some(simple_name) = qualified_name.split("::").last() {
128                simple_to_qualified
129                    .entry(simple_name.to_string())
130                    .or_insert_with(Vec::new)
131                    .push(qualified_name.clone());
132            }
133        }
134
135        // Now qualify the unqualified annotations
136        debug_println!("DEBUG HEADER: Qualifying {} unqualified annotations", unqualified_annotations.len());
137        for (simple_name, safety_mode) in &unqualified_annotations {
138            debug_println!("DEBUG HEADER: Processing unqualified '{}': {:?}", simple_name, safety_mode);
139            // Check if this simple name has qualified versions from LibClang
140            if let Some(qualified_names) = simple_to_qualified.get(simple_name) {
141                // This is a method - add annotation for all qualified versions
142                for qualified in qualified_names {
143                    debug_println!("DEBUG HEADER: Qualifying '{}' -> '{}': {:?}",
144                                 simple_name, qualified, safety_mode);
145                    // Update the annotation (LibClang may have found it too, but comment annotation takes precedence)
146                    self.safety_annotations.insert(qualified.clone(), *safety_mode);
147                }
148            } else {
149                // Not a method (no qualified name found), just a plain function
150                // Keep the simple name
151                debug_println!("DEBUG HEADER: Adding plain function annotation for '{}': {:?}",
152                             simple_name, safety_mode);
153                self.safety_annotations.insert(simple_name.clone(), *safety_mode);
154            }
155        }
156
157        debug_println!("DEBUG HEADER: Found {} safety annotations in header (after qualification)", self.safety_annotations.len());
158        for (name, mode) in &self.safety_annotations {
159            debug_println!("DEBUG HEADER:   - {} : {:?}", name, mode);
160        }
161
162        // Mark as processed BEFORE parsing includes to avoid infinite recursion
163        self.processed_headers.push(header_path.to_path_buf());
164
165        // Recursively parse includes from this header
166        if let Ok(content) = fs::read_to_string(header_path) {
167            let (quoted_includes, angle_includes) = extract_includes(&content);
168
169            // Process quoted includes (search relative to header file first)
170            for include_path in quoted_includes {
171                if let Some(resolved) = self.resolve_include(&include_path, header_path, true) {
172                    // Recursively parse the included header
173                    let _ = self.parse_header(&resolved);
174                }
175            }
176
177            // Process angle bracket includes (search include paths only)
178            for include_path in angle_includes {
179                if let Some(resolved) = self.resolve_include(&include_path, header_path, false) {
180                    // Recursively parse the included header
181                    let _ = self.parse_header(&resolved);
182                }
183            }
184        }
185
186        Ok(())
187    }
188    
189    /// Parse headers from a C++ source file's includes
190    pub fn parse_includes_from_source(&mut self, cpp_file: &Path) -> Result<(), String> {
191        let content = fs::read_to_string(cpp_file)
192            .map_err(|e| format!("Failed to read {}: {}", cpp_file.display(), e))?;
193        
194        let (quoted_includes, angle_includes) = extract_includes(&content);
195        
196        // Process quoted includes (search relative to source file first)
197        for include_path in quoted_includes {
198            if let Some(resolved) = self.resolve_include(&include_path, cpp_file, true) {
199                self.parse_header(&resolved)?;
200            }
201        }
202        
203        // Process angle bracket includes (search include paths only)
204        for include_path in angle_includes {
205            if let Some(resolved) = self.resolve_include(&include_path, cpp_file, false) {
206                self.parse_header(&resolved)?;
207            }
208        }
209        
210        Ok(())
211    }
212    
213    /// Resolve an include path using standard C++ include resolution rules
214    fn resolve_include(&self, include_path: &str, source_file: &Path, search_source_dir: bool) -> Option<PathBuf> {
215        // For quoted includes, first try relative to the source file
216        if search_source_dir {
217            if let Some(parent) = source_file.parent() {
218                let local_path = parent.join(include_path);
219                if local_path.exists() {
220                    return Some(local_path);
221                }
222            }
223        }
224        
225        // Search in include paths
226        for include_dir in &self.include_paths {
227            let full_path = include_dir.join(include_path);
228            if full_path.exists() {
229                return Some(full_path);
230            }
231        }
232        
233        // Try as absolute or relative to current directory
234        let path = PathBuf::from(include_path);
235        if path.exists() {
236            return Some(path);
237        }
238        
239        None
240    }
241    
242    fn visit_entity_for_signatures(&mut self, entity: &clang::Entity) {
243        self.visit_entity_with_context(entity, None, None);
244    }
245
246    /// Visit entities tracking both namespace and class-level safety annotations.
247    /// Annotation hierarchy: function > class > namespace
248    fn visit_entity_with_context(
249        &mut self,
250        entity: &clang::Entity,
251        namespace_safety: Option<SafetyMode>,
252        class_safety: Option<SafetyMode>,
253    ) {
254        use clang::EntityKind;
255
256        // Track current context
257        let mut current_namespace_safety = namespace_safety;
258        let mut current_class_safety = class_safety;
259
260        // Check if this is a namespace with safety annotation
261        if entity.get_kind() == EntityKind::Namespace {
262            if let Some(safety) = parse_entity_safety(entity) {
263                current_namespace_safety = Some(safety);
264                if let Some(name) = entity.get_name() {
265                    debug_println!("DEBUG SAFETY: Found namespace '{}' with {:?} annotation", name, safety);
266                }
267            } else {
268                // IMPORTANT: Reset namespace safety when entering a namespace without annotation
269                // This prevents safety from leaking from one namespace to another
270                // (e.g., user's @safe namespace shouldn't apply to std::)
271                current_namespace_safety = None;
272                debug_println!("DEBUG SAFETY: Entering namespace {:?} without annotation - resetting namespace safety",
273                    entity.get_name());
274            }
275        }
276
277        // Check if this is a class/struct with safety annotation
278        if entity.get_kind() == EntityKind::ClassDecl || entity.get_kind() == EntityKind::StructDecl {
279            if let Some(safety) = parse_entity_safety(entity) {
280                current_class_safety = Some(safety);
281                if let Some(name) = entity.get_name() {
282                    debug_println!("DEBUG SAFETY: Found class '{}' with {:?} annotation in header", name, safety);
283                }
284            } else if current_namespace_safety.is_some() {
285                // If class has no explicit annotation, DON'T inherit from namespace
286                // Classes without annotations are undeclared
287                current_class_safety = None;
288            }
289        }
290
291        match entity.get_kind() {
292            EntityKind::FunctionDecl | EntityKind::Method | EntityKind::Constructor | EntityKind::FunctionTemplate => {
293
294                // Extract lifetime annotations
295                if let Some(mut sig) = extract_annotations(entity) {
296                    // Always use qualified name for all functions to avoid namespace collisions
297                    // This ensures functions like ns1::helper and ns2::helper are distinguished
298                    let qualified_name = crate::parser::ast_visitor::get_qualified_name(entity);
299
300                    // Update the signature name to use qualified name
301                    sig.name = qualified_name.clone();
302                    self.signatures.insert(qualified_name, sig);
303                }
304
305                // Extract safety annotations from the entity itself
306                let mut safety = parse_entity_safety(entity);
307
308                // If no explicit safety annotation, inherit from class first, then namespace
309                // Hierarchy: function > class > namespace
310                if safety.is_none() {
311                    if current_class_safety.is_some() {
312                        safety = current_class_safety;
313                        debug_println!("DEBUG SAFETY: Method inheriting {:?} from class", safety);
314                    } else {
315                        safety = current_namespace_safety;
316                        if safety.is_some() {
317                            debug_println!("DEBUG SAFETY: Function inheriting {:?} from namespace", safety);
318                        }
319                    }
320                }
321
322                if let Some(safety_mode) = safety {
323                    // Always use qualified name for all functions to avoid namespace collisions
324                    // This ensures functions like ns1::helper and ns2::helper are distinguished
325                    let raw_name = crate::parser::ast_visitor::get_qualified_name(entity);
326
327                    // For template constructors, the name may include template params like "Option<T>"
328                    // Strip template params so lookups match (call sites use "Option", not "Option<T>")
329                    let name = strip_template_params(&raw_name);
330
331                    self.safety_annotations.insert(name.clone(), safety_mode);
332                    debug_println!("DEBUG SAFETY: Found function '{}' with {:?} annotation in header", name, safety_mode);
333                }
334            }
335            _ => {}
336        }
337
338        // Recursively visit children, passing down context
339        // For class children, pass current_class_safety
340        // For namespace children (not inside a class), pass None for class_safety
341        let child_class_safety = if entity.get_kind() == EntityKind::ClassDecl || entity.get_kind() == EntityKind::StructDecl {
342            current_class_safety
343        } else {
344            class_safety  // Keep parent's class safety for nested entities
345        };
346
347        for child in entity.get_children() {
348            self.visit_entity_with_context(&child, current_namespace_safety, child_class_safety);
349        }
350    }
351    
352    /// Check if any signatures are cached
353    pub fn has_signatures(&self) -> bool {
354        !self.signatures.is_empty()
355    }
356}
357
358/// Extract include paths from C++ source, separating quoted and angle bracket includes
359fn extract_includes(content: &str) -> (Vec<String>, Vec<String>) {
360    let mut quoted_includes = Vec::new();
361    let mut angle_includes = Vec::new();
362    
363    // Match quoted includes: #include "file.h"
364    let quoted_re = Regex::new(r#"#include\s*"([^"]+)""#).unwrap();
365    for cap in quoted_re.captures_iter(content) {
366        if let Some(path) = cap.get(1) {
367            quoted_includes.push(path.as_str().to_string());
368        }
369    }
370    
371    // Match angle bracket includes: #include <file.h>
372    let angle_re = Regex::new(r#"#include\s*<([^>]+)>"#).unwrap();
373    for cap in angle_re.captures_iter(content) {
374        if let Some(path) = cap.get(1) {
375            angle_includes.push(path.as_str().to_string());
376        }
377    }
378    
379    (quoted_includes, angle_includes)
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    
386    #[test]
387    fn test_extract_includes() {
388        let content = r#"
389#include "user.h"
390#include "data.h"
391#include <iostream>
392#include <vector>
393#include "utils/helper.h"
394        "#;
395        
396        let (quoted, angle) = extract_includes(content);
397        assert_eq!(quoted.len(), 3);
398        assert_eq!(quoted[0], "user.h");
399        assert_eq!(quoted[1], "data.h");
400        assert_eq!(quoted[2], "utils/helper.h");
401        
402        assert_eq!(angle.len(), 2);
403        assert_eq!(angle[0], "iostream");
404        assert_eq!(angle[1], "vector");
405    }
406
407    #[test]
408    fn test_strip_template_params_simple() {
409        // Simple template class name
410        assert_eq!(strip_template_params("Option<T>"), "Option");
411        assert_eq!(strip_template_params("Vector<int>"), "Vector");
412        assert_eq!(strip_template_params("Map<K, V>"), "Map");
413    }
414
415    #[test]
416    fn test_strip_template_params_nested() {
417        // Nested template parameters
418        assert_eq!(strip_template_params("Option<Vector<int>>"), "Option");
419        assert_eq!(strip_template_params("Map<string, Vector<int>>"), "Map");
420    }
421
422    #[test]
423    fn test_strip_template_params_qualified() {
424        // Qualified names with templates
425        assert_eq!(strip_template_params("rusty::Option<T>"), "rusty::Option");
426        assert_eq!(strip_template_params("std::vector<int>"), "std::vector");
427        assert_eq!(strip_template_params("ns::inner::Class<T, U>"), "ns::inner::Class");
428    }
429
430    #[test]
431    fn test_strip_template_params_no_template() {
432        // Names without template parameters should be unchanged
433        assert_eq!(strip_template_params("Option"), "Option");
434        assert_eq!(strip_template_params("rusty::Option"), "rusty::Option");
435        assert_eq!(strip_template_params("some_function"), "some_function");
436    }
437
438    #[test]
439    fn test_strip_template_params_constructor() {
440        // Constructor names like "Option<T>::Option<T>" -> "Option::Option"
441        // Note: This tests the function itself, not the full qualified name handling
442        assert_eq!(strip_template_params("Option<T>::Option"), "Option");
443    }
444}