Skip to main content

rust_code_analysis_code_split/
preproc.rs

1use std::collections::{HashMap, HashSet, hash_map};
2use std::path::{Path, PathBuf};
3
4use petgraph::{
5    Direction, algo::kosaraju_scc, graph::NodeIndex, stable_graph::StableGraph, visit::Dfs,
6};
7use serde::{Deserialize, Serialize};
8
9use crate::c_langs_macros::is_specials;
10
11use crate::langs::*;
12use crate::languages::language_preproc::*;
13use crate::tools::*;
14use crate::traits::*;
15
16/// Preprocessor data of a `C/C++` file.
17#[derive(Debug, Default, Deserialize, Serialize)]
18pub struct PreprocFile {
19    /// The set of include directives explicitly written in a file
20    pub direct_includes: HashSet<String>,
21    /// The set of include directives implicitly imported in a file
22    /// from other files
23    pub indirect_includes: HashSet<String>,
24    /// The set of macros of a file
25    pub macros: HashSet<String>,
26}
27
28/// Preprocessor data of a series of `C/C++` files.
29#[derive(Debug, Default, Deserialize, Serialize)]
30pub struct PreprocResults {
31    /// The preprocessor data of each `C/C++` file
32    pub files: HashMap<PathBuf, PreprocFile>,
33}
34
35impl PreprocFile {
36    /// Adds new macros to the set of macro of a file.
37    pub fn new_macros(macros: &[&str]) -> Self {
38        let mut pf = Self::default();
39        for m in macros {
40            pf.macros.insert((*m).to_string());
41        }
42        pf
43    }
44}
45
46/// Returns the macros contained in a `C/C++` file.
47pub fn get_macros<S: ::std::hash::BuildHasher>(
48    file: &Path,
49    files: &HashMap<PathBuf, PreprocFile, S>,
50) -> HashSet<String> {
51    let mut macros = HashSet::new();
52    if let Some(pf) = files.get(file) {
53        for m in pf.macros.iter() {
54            macros.insert(m.to_string());
55        }
56        for f in pf.indirect_includes.iter() {
57            if let Some(pf) = files.get(&PathBuf::from(f)) {
58                for m in pf.macros.iter() {
59                    macros.insert(m.to_string());
60                }
61            }
62        }
63    }
64    macros
65}
66
67/// Constructs a dependency graph of the include directives
68/// in a `C/C++` file.
69///
70/// The dependency graph is built using both preprocessor data and not
71/// extracted from the considered `C/C++` files.
72pub fn fix_includes<S: ::std::hash::BuildHasher>(
73    files: &mut HashMap<PathBuf, PreprocFile, S>,
74    all_files: &HashMap<String, Vec<PathBuf>, S>,
75) {
76    let mut nodes: HashMap<PathBuf, NodeIndex> = HashMap::new();
77    // Since we'll remove strong connected components we need to have a stable graph
78    // in order to use the nodes we've in the nodes HashMap.
79    let mut g = StableGraph::new();
80
81    // First we build a graph of include dependencies
82    for (file, pf) in files.iter() {
83        let node = match nodes.entry(file.clone()) {
84            hash_map::Entry::Occupied(l) => *l.get(),
85            hash_map::Entry::Vacant(p) => *p.insert(g.add_node(file.clone())),
86        };
87        let direct_includes = &pf.direct_includes;
88        for i in direct_includes {
89            let possibilities = guess_file(file, i, all_files);
90            for i in possibilities {
91                if &i != file {
92                    let i = match nodes.entry(i.clone()) {
93                        hash_map::Entry::Occupied(l) => *l.get(),
94                        hash_map::Entry::Vacant(p) => *p.insert(g.add_node(i)),
95                    };
96                    g.add_edge(node, i, 0);
97                } else {
98                    // TODO: add an option to display warning
99                    eprintln!("Warning: possible self inclusion {file:?}");
100                }
101            }
102        }
103    }
104
105    // In order to walk in the graph without issues due to cycles
106    // we replace strong connected components by a unique node
107    // All the paths in a scc finally represents a kind of unique file containing
108    // all the files in the scc.
109    let mut scc = kosaraju_scc(&g);
110    let mut scc_map: HashMap<NodeIndex, HashSet<String>> = HashMap::new();
111    for component in scc.iter_mut() {
112        if component.len() > 1 {
113            // For Firefox, there are only few scc and all of them are pretty small
114            // So no need to take a hammer here (for 'contains' stuff).
115            // TODO: in some case a hammer can be useful: check perf Vec vs HashSet
116            let mut incoming = Vec::new();
117            let mut outgoing = Vec::new();
118            let mut paths = HashSet::new();
119
120            for c in component.iter() {
121                for i in g.neighbors_directed(*c, Direction::Incoming) {
122                    if !component.contains(&i) && !incoming.contains(&i) {
123                        incoming.push(i);
124                    }
125                }
126                for o in g.neighbors_directed(*c, Direction::Outgoing) {
127                    if !component.contains(&o) && !outgoing.contains(&o) {
128                        outgoing.push(o);
129                    }
130                }
131            }
132
133            let replacement = g.add_node(PathBuf::from(""));
134            for i in incoming.drain(..) {
135                g.add_edge(i, replacement, 0);
136            }
137            for o in outgoing.drain(..) {
138                g.add_edge(replacement, o, 0);
139            }
140            for c in component.drain(..) {
141                let path = g.remove_node(c).unwrap();
142                paths.insert(path.to_str().unwrap().to_string());
143                *nodes.get_mut(&path).unwrap() = replacement;
144            }
145
146            eprintln!("Warning: possible include cycle:");
147            for p in paths.iter() {
148                eprintln!("  - {p:?}");
149            }
150            eprintln!();
151
152            scc_map.insert(replacement, paths);
153        }
154    }
155
156    for (path, node) in nodes {
157        let mut dfs = Dfs::new(&g, node);
158        if let Some(pf) = files.get_mut(&path) {
159            let x_inc = &mut pf.indirect_includes;
160            while let Some(node) = dfs.next(&g) {
161                let w = g.node_weight(node).unwrap();
162                if w == &PathBuf::from("") {
163                    let paths = scc_map.get(&node);
164                    if let Some(paths) = paths {
165                        for p in paths {
166                            x_inc.insert(p.to_string());
167                        }
168                    } else {
169                        eprintln!("DEBUG: {path:?} {node:?}");
170                    }
171                } else {
172                    x_inc.insert(w.to_str().unwrap().to_string());
173                }
174            }
175        } else {
176            eprintln!(
177                "Warning: included file which has not been preprocessed: {:?}",
178                path
179            );
180        }
181    }
182}
183
184/// Extracts preprocessor data from a `C/C++` file
185/// and inserts these data in a [`PreprocResults`] object.
186///
187///
188/// [`PreprocResults`]: struct.PreprocResults.html
189pub fn preprocess(parser: &PreprocParser, path: &Path, results: &mut PreprocResults) {
190    let node = parser.get_root();
191    let mut cursor = node.cursor();
192    let mut stack = Vec::new();
193    let code = parser.get_code();
194    let mut file_result = PreprocFile::default();
195
196    stack.push(node);
197
198    while let Some(node) = stack.pop() {
199        cursor.reset(&node);
200        if cursor.goto_first_child() {
201            loop {
202                stack.push(cursor.node());
203                if !cursor.goto_next_sibling() {
204                    break;
205                }
206            }
207        }
208
209        let id = Preproc::from(node.kind_id());
210        match id {
211            Preproc::Define | Preproc::Undef => {
212                cursor.reset(&node);
213                cursor.goto_first_child();
214                let identifier = cursor.node();
215
216                if identifier.kind_id() == Preproc::Identifier {
217                    let r#macro = identifier.utf8_text(code).unwrap();
218                    if !is_specials(r#macro) {
219                        file_result.macros.insert(r#macro.to_string());
220                    }
221                }
222            }
223            Preproc::PreprocInclude => {
224                cursor.reset(&node);
225                cursor.goto_first_child();
226                let file = cursor.node();
227
228                if file.kind_id() == Preproc::StringLiteral {
229                    // remove the starting/ending double quote
230                    let file = &code[file.start_byte() + 1..file.end_byte() - 1];
231                    let start = file.iter().position(|&c| c != b' ' && c != b'\t').unwrap();
232                    let end = file.iter().rposition(|&c| c != b' ' && c != b'\t').unwrap();
233                    let file = &file[start..=end];
234                    let file = String::from_utf8(file.to_vec()).unwrap();
235                    file_result.direct_includes.insert(file);
236                }
237            }
238            _ => {}
239        }
240    }
241
242    results.files.insert(path.to_path_buf(), file_result);
243}