rust_code_analysis/
preproc.rs

1use std::collections::{hash_map, HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use petgraph::{
5    algo::kosaraju_scc, graph::NodeIndex, stable_graph::StableGraph, visit::Dfs, Direction,
6};
7use serde::{Deserialize, Serialize};
8
9use crate::c_langs_macros::is_specials;
10use crate::node::Node;
11
12use crate::langs::*;
13use crate::languages::language_preproc::*;
14use crate::tools::*;
15use crate::traits::*;
16
17/// Preprocessor data of a `C/C++` file.
18#[derive(Debug, Default, Deserialize, Serialize)]
19pub struct PreprocFile {
20    /// The set of include directives explicitly written in a file
21    pub direct_includes: HashSet<String>,
22    /// The set of include directives implicitly imported in a file
23    /// from other files
24    pub indirect_includes: HashSet<String>,
25    /// The set of macros of a file
26    pub macros: HashSet<String>,
27}
28
29/// Preprocessor data of a series of `C/C++` files.
30#[derive(Debug, Default, Deserialize, Serialize)]
31pub struct PreprocResults {
32    /// The preprocessor data of each `C/C++` file
33    pub files: HashMap<PathBuf, PreprocFile>,
34}
35
36impl PreprocFile {
37    /// Adds new macros to the set of macro of a file.
38    pub fn new_macros(macros: &[&str]) -> Self {
39        let mut pf = Self::default();
40        for m in macros {
41            pf.macros.insert((*m).to_string());
42        }
43        pf
44    }
45}
46
47/// Returns the macros contained in a `C/C++` file.
48pub fn get_macros<S: ::std::hash::BuildHasher>(
49    file: &Path,
50    files: &HashMap<PathBuf, PreprocFile, S>,
51) -> HashSet<String> {
52    let mut macros = HashSet::new();
53    if let Some(pf) = files.get(file) {
54        for m in pf.macros.iter() {
55            macros.insert(m.to_string());
56        }
57        for f in pf.indirect_includes.iter() {
58            if let Some(pf) = files.get(&PathBuf::from(f)) {
59                for m in pf.macros.iter() {
60                    macros.insert(m.to_string());
61                }
62            }
63        }
64    }
65    macros
66}
67
68/// Constructs a dependency graph of the include directives
69/// in a `C/C++` file.
70///
71/// The dependency graph is built using both preprocessor data and not
72/// extracted from the considered `C/C++` files.
73pub fn fix_includes<S: ::std::hash::BuildHasher>(
74    files: &mut HashMap<PathBuf, PreprocFile, S>,
75    all_files: &HashMap<String, Vec<PathBuf>, S>,
76) {
77    let mut nodes: HashMap<PathBuf, NodeIndex> = HashMap::new();
78    // Since we'll remove strong connected components we need to have a stable graph
79    // in order to use the nodes we've in the nodes HashMap.
80    let mut g = StableGraph::new();
81
82    // First we build a graph of include dependencies
83    for (file, pf) in files.iter() {
84        let node = match nodes.entry(file.clone()) {
85            hash_map::Entry::Occupied(l) => *l.get(),
86            hash_map::Entry::Vacant(p) => *p.insert(g.add_node(file.clone())),
87        };
88        let direct_includes = &pf.direct_includes;
89        for i in direct_includes {
90            let possibilities = guess_file(file, i, all_files);
91            for i in possibilities {
92                if &i != file {
93                    let i = match nodes.entry(i.clone()) {
94                        hash_map::Entry::Occupied(l) => *l.get(),
95                        hash_map::Entry::Vacant(p) => *p.insert(g.add_node(i)),
96                    };
97                    g.add_edge(node, i, 0);
98                } else {
99                    // TODO: add an option to display warning
100                    eprintln!("Warning: possible self inclusion {:?}", file);
101                }
102            }
103        }
104    }
105
106    // In order to walk in the graph without issues due to cycles
107    // we replace strong connected components by a unique node
108    // All the paths in a scc finally represents a kind of unique file containing
109    // all the files in the scc.
110    let mut scc = kosaraju_scc(&g);
111    let mut scc_map: HashMap<NodeIndex, HashSet<String>> = HashMap::new();
112    for component in scc.iter_mut() {
113        if component.len() > 1 {
114            // For Firefox, there are only few scc and all of them are pretty small
115            // So no need to take a hammer here (for 'contains' stuff).
116            // TODO: in some case a hammer can be useful: check perf Vec vs HashSet
117            let mut incoming = Vec::new();
118            let mut outgoing = Vec::new();
119            let mut paths = HashSet::new();
120
121            for c in component.iter() {
122                for i in g.neighbors_directed(*c, Direction::Incoming) {
123                    if !component.contains(&i) && !incoming.contains(&i) {
124                        incoming.push(i);
125                    }
126                }
127                for o in g.neighbors_directed(*c, Direction::Outgoing) {
128                    if !component.contains(&o) && !outgoing.contains(&o) {
129                        outgoing.push(o);
130                    }
131                }
132            }
133
134            let replacement = g.add_node(PathBuf::from(""));
135            for i in incoming.drain(..) {
136                g.add_edge(i, replacement, 0);
137            }
138            for o in outgoing.drain(..) {
139                g.add_edge(replacement, o, 0);
140            }
141            for c in component.drain(..) {
142                let path = g.remove_node(c).unwrap();
143                paths.insert(path.to_str().unwrap().to_string());
144                *nodes.get_mut(&path).unwrap() = replacement;
145            }
146
147            eprintln!("Warning: possible include cycle:");
148            for p in paths.iter() {
149                eprintln!("  - {:?}", p);
150            }
151            eprintln!();
152
153            scc_map.insert(replacement, paths);
154        }
155    }
156
157    for (path, node) in nodes {
158        let mut dfs = Dfs::new(&g, node);
159        if let Some(pf) = files.get_mut(&path) {
160            let x_inc = &mut pf.indirect_includes;
161            while let Some(node) = dfs.next(&g) {
162                let w = g.node_weight(node).unwrap();
163                if w == &PathBuf::from("") {
164                    let paths = scc_map.get(&node);
165                    if let Some(paths) = paths {
166                        for p in paths {
167                            x_inc.insert(p.to_string());
168                        }
169                    } else {
170                        eprintln!("DEBUG: {:?} {:?}", path, node);
171                    }
172                } else {
173                    x_inc.insert(w.to_str().unwrap().to_string());
174                }
175            }
176        } else {
177            //eprintln!("Warning: included file which has not been preprocessed: {:?}", path);
178        }
179    }
180}
181
182/// Extracts preprocessor data from a `C/C++` file
183/// and inserts these data in a [`PreprocResults`] object.
184///
185///
186/// [`PreprocResults`]: struct.PreprocResults.html
187pub fn preprocess(parser: &PreprocParser, path: &Path, results: &mut PreprocResults) {
188    let node = parser.get_root();
189    let mut cursor = node.object().walk();
190    let mut stack = Vec::new();
191    let code = parser.get_code();
192    let mut file_result = PreprocFile::default();
193
194    //dump_node(code, &node, -1, None, None);
195    //eprintln!("DEBUG {:?}", path);
196    stack.push(node);
197
198    while let Some(node) = stack.pop() {
199        cursor.reset(node.object());
200        if cursor.goto_first_child() {
201            loop {
202                stack.push(Node::new(cursor.node()));
203                if !cursor.goto_next_sibling() {
204                    break;
205                }
206            }
207        }
208
209        let id = Preproc::from(node.object().kind_id());
210        match id {
211            Preproc::Define | Preproc::Undef => {
212                cursor.reset(node.object());
213                cursor.goto_first_child();
214                let identifier = cursor.node();
215
216                if identifier.kind_id() == Preproc::Identifier {
217                    let r#macro = identifier.utf8_text(code).unwrap();
218                    if !is_specials(r#macro) {
219                        file_result.macros.insert(r#macro.to_string());
220                    }
221                }
222            }
223            Preproc::PreprocInclude => {
224                cursor.reset(node.object());
225                cursor.goto_first_child();
226                let file = cursor.node();
227
228                if file.kind_id() == Preproc::StringLiteral {
229                    // remove the starting/ending double quote
230                    let file = &code[file.start_byte() + 1..file.end_byte() - 1];
231                    let start = file.iter().position(|&c| c != b' ' && c != b'\t').unwrap();
232                    let end = file.iter().rposition(|&c| c != b' ' && c != b'\t').unwrap();
233                    let file = &file[start..=end];
234                    let file = String::from_utf8(file.to_vec()).unwrap();
235                    file_result.direct_includes.insert(file);
236                }
237            }
238            _ => {}
239        }
240    }
241
242    results.files.insert(path.to_path_buf(), file_result);
243}