big_code_analysis/
preproc.rs1#![allow(
8 clippy::enum_glob_use,
9 clippy::if_not_else,
10 clippy::too_many_lines,
11 clippy::wildcard_imports
12)]
13
14use std::collections::{HashMap, HashSet, hash_map};
15use std::path::{Path, PathBuf};
16
17use petgraph::{
18 Direction, algo::kosaraju_scc, graph::NodeIndex, stable_graph::StableGraph, visit::Dfs,
19};
20use serde::{Deserialize, Serialize};
21
22use crate::c_langs_macros::is_specials;
23
24use crate::langs::*;
25use crate::languages::language_preproc::*;
26use crate::tools::*;
27use crate::traits::*;
28
29#[derive(Debug, Default, Deserialize, Serialize)]
31pub struct PreprocFile {
32 pub direct_includes: HashSet<String>,
34 pub indirect_includes: HashSet<String>,
37 pub macros: HashSet<String>,
39}
40
41#[derive(Debug, Default, Deserialize, Serialize)]
43pub struct PreprocResults {
44 pub files: HashMap<PathBuf, PreprocFile>,
46}
47
48impl PreprocFile {
49 #[must_use]
51 pub fn new_macros(macros: &[&str]) -> Self {
52 let mut pf = Self::default();
53 for m in macros {
54 pf.macros.insert((*m).to_string());
55 }
56 pf
57 }
58}
59
60pub fn get_macros<S: ::std::hash::BuildHasher>(
62 file: &Path,
63 files: &HashMap<PathBuf, PreprocFile, S>,
64) -> HashSet<String> {
65 let mut macros = HashSet::new();
66 if let Some(pf) = files.get(file) {
67 for m in &pf.macros {
68 macros.insert(m.clone());
69 }
70 for f in &pf.indirect_includes {
71 if let Some(pf) = files.get(&PathBuf::from(f)) {
72 for m in &pf.macros {
73 macros.insert(m.clone());
74 }
75 }
76 }
77 }
78 macros
79}
80
81pub fn fix_includes<S: ::std::hash::BuildHasher>(
98 files: &mut HashMap<PathBuf, PreprocFile, S>,
99 all_files: &HashMap<String, Vec<PathBuf>, S>,
100) {
101 let mut nodes: HashMap<PathBuf, NodeIndex> = HashMap::new();
102 let mut g = StableGraph::new();
105
106 for (file, pf) in files.iter() {
108 let node = match nodes.entry(file.clone()) {
109 hash_map::Entry::Occupied(l) => *l.get(),
110 hash_map::Entry::Vacant(p) => *p.insert(g.add_node(file.clone())),
111 };
112 let direct_includes = &pf.direct_includes;
113 for i in direct_includes {
114 let possibilities = guess_file(file, i, all_files);
115 for i in possibilities {
116 if &i != file {
117 let i = match nodes.entry(i.clone()) {
118 hash_map::Entry::Occupied(l) => *l.get(),
119 hash_map::Entry::Vacant(p) => *p.insert(g.add_node(i)),
120 };
121 g.add_edge(node, i, 0);
122 } else {
123 eprintln!("Warning: possible self inclusion {}", file.display());
125 }
126 }
127 }
128 }
129
130 let mut scc = kosaraju_scc(&g);
135 let mut scc_map: HashMap<NodeIndex, HashSet<String>> = HashMap::new();
136 for component in &mut scc {
137 if component.len() > 1 {
138 let mut incoming = Vec::new();
142 let mut outgoing = Vec::new();
143 let mut paths = HashSet::new();
144
145 for c in component.iter() {
146 for i in g.neighbors_directed(*c, Direction::Incoming) {
147 if !component.contains(&i) && !incoming.contains(&i) {
148 incoming.push(i);
149 }
150 }
151 for o in g.neighbors_directed(*c, Direction::Outgoing) {
152 if !component.contains(&o) && !outgoing.contains(&o) {
153 outgoing.push(o);
154 }
155 }
156 }
157
158 let replacement = g.add_node(PathBuf::from(""));
159 for i in incoming.drain(..) {
160 g.add_edge(i, replacement, 0);
161 }
162 for o in outgoing.drain(..) {
163 g.add_edge(replacement, o, 0);
164 }
165 for c in component.drain(..) {
166 let path = g
167 .remove_node(c)
168 .expect("invariant: SCC component node must exist in graph");
169 if let Some(s) = path.to_str() {
170 paths.insert(s.to_string());
171 } else {
172 eprintln!(
173 "warning: skipping non-UTF-8 path in include cycle: {}",
174 path.display()
175 );
176 }
177 *nodes
178 .get_mut(&path)
179 .expect("invariant: every graph node must have a nodes map entry") =
180 replacement;
181 }
182
183 eprintln!("Warning: possible include cycle:");
184 for p in &paths {
185 eprintln!(" - \"{p}\"");
189 }
190 eprintln!();
191
192 scc_map.insert(replacement, paths);
193 }
194 }
195
196 for (path, node) in nodes {
197 let mut dfs = Dfs::new(&g, node);
198 if let Some(pf) = files.get_mut(&path) {
199 let x_inc = &mut pf.indirect_includes;
200 while let Some(node) = dfs.next(&g) {
201 let w = g
202 .node_weight(node)
203 .expect("invariant: DFS-visited node must have weight in graph");
204 if w == &PathBuf::from("") {
205 let paths = scc_map.get(&node);
206 if let Some(paths) = paths {
207 for p in paths {
208 x_inc.insert(p.clone());
209 }
210 } else {
211 unreachable!(
212 "every empty-path node is an SCC replacement and must have a scc_map entry"
213 );
214 }
215 } else {
216 let Some(s) = w.to_str() else {
217 eprintln!(
218 "warning: skipping non-UTF-8 indirect include path: {}",
219 w.display()
220 );
221 continue;
222 };
223 x_inc.insert(s.to_string());
224 }
225 }
226 } else {
227 eprintln!(
228 "Warning: included file which has not been preprocessed: {}",
229 path.display()
230 );
231 }
232 }
233}
234
235pub fn preprocess(parser: &PreprocParser, path: &Path, results: &mut PreprocResults) {
241 let node = parser.get_root();
242 let mut cursor = node.cursor();
243 let mut stack = Vec::new();
244 let code = parser.get_code();
245 let mut file_result = PreprocFile::default();
246
247 stack.push(node);
248
249 while let Some(node) = stack.pop() {
250 cursor.reset(&node);
251 if cursor.goto_first_child() {
252 loop {
253 stack.push(cursor.node());
254 if !cursor.goto_next_sibling() {
255 break;
256 }
257 }
258 }
259
260 let id = Preproc::from(node.kind_id());
261 match id {
262 Preproc::Define | Preproc::Undef => {
263 cursor.reset(&node);
264 cursor.goto_first_child();
265 let identifier = cursor.node();
266
267 if identifier.kind_id() == Preproc::Identifier {
268 let Some(macro_text) = identifier.utf8_text(code) else {
269 continue;
270 };
271 if !is_specials(macro_text) {
272 file_result.macros.insert(macro_text.to_string());
273 }
274 }
275 }
276 Preproc::PreprocInclude => {
277 cursor.reset(&node);
278 cursor.goto_first_child();
279 let file = cursor.node();
280
281 if file.kind_id() == Preproc::StringLiteral {
282 let file = &code[file.start_byte() + 1..file.end_byte() - 1];
284 let Some(start) = file.iter().position(|&c| c != b' ' && c != b'\t') else {
285 continue;
286 };
287 let Some(end) = file.iter().rposition(|&c| c != b' ' && c != b'\t') else {
288 continue;
289 };
290 let file = &file[start..=end];
291 let Ok(file) = std::str::from_utf8(file) else {
292 continue;
293 };
294 file_result.direct_includes.insert(file.to_string());
295 }
296 }
297 _ => {}
298 }
299 }
300
301 results.files.insert(path.to_path_buf(), file_result);
302}
303
304#[cfg(test)]
305#[allow(
306 clippy::float_cmp,
307 clippy::cast_precision_loss,
308 clippy::cast_possible_truncation,
309 clippy::cast_sign_loss,
310 clippy::similar_names,
311 clippy::doc_markdown,
312 clippy::needless_raw_string_hashes,
313 clippy::too_many_lines
314)]
315mod tests {
316 use super::*;
317
318 fn parse(source: &str) -> PreprocParser {
319 PreprocParser::new(source.as_bytes().to_vec(), &PathBuf::from("test.h"), None)
320 }
321
322 #[test]
327 fn preprocess_empty_include_does_not_panic() {
328 let parser = parse("#include \"\"\n");
329 let mut results = PreprocResults::default();
330 preprocess(&parser, &PathBuf::from("test.h"), &mut results);
331 let pf = results
332 .files
333 .get(&PathBuf::from("test.h"))
334 .expect("file entry must be inserted");
335 assert!(pf.direct_includes.is_empty());
336 }
337
338 #[test]
341 fn preprocess_whitespace_only_include_does_not_panic() {
342 let parser = parse("#include \" \"\n");
343 let mut results = PreprocResults::default();
344 preprocess(&parser, &PathBuf::from("test.h"), &mut results);
345 let pf = results
346 .files
347 .get(&PathBuf::from("test.h"))
348 .expect("file entry must be inserted");
349 assert!(pf.direct_includes.is_empty());
350 }
351
352 #[test]
355 fn preprocess_valid_include_is_recorded() {
356 let parser = parse("#include \" foo.h \"\n");
357 let mut results = PreprocResults::default();
358 preprocess(&parser, &PathBuf::from("test.h"), &mut results);
359 let pf = results
360 .files
361 .get(&PathBuf::from("test.h"))
362 .expect("file entry must be inserted");
363 assert!(pf.direct_includes.contains("foo.h"));
364 }
365
366 #[test]
368 fn preprocess_define_records_macro() {
369 let parser = parse("#define FOO 1\n");
370 let mut results = PreprocResults::default();
371 preprocess(&parser, &PathBuf::from("test.h"), &mut results);
372 let pf = results
373 .files
374 .get(&PathBuf::from("test.h"))
375 .expect("file entry must be inserted");
376 assert!(pf.macros.contains("FOO"));
377 }
378
379 #[test]
384 fn fix_includes_handles_simple_cycle() {
385 let mut files: HashMap<PathBuf, PreprocFile> = HashMap::new();
386 let mut a = PreprocFile::default();
387 a.direct_includes.insert("b.h".to_string());
388 let mut b = PreprocFile::default();
389 b.direct_includes.insert("a.h".to_string());
390 files.insert(PathBuf::from("a.h"), a);
391 files.insert(PathBuf::from("b.h"), b);
392
393 let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
394 all_files.insert("a.h".to_string(), vec![PathBuf::from("a.h")]);
395 all_files.insert("b.h".to_string(), vec![PathBuf::from("b.h")]);
396
397 fix_includes(&mut files, &all_files);
398
399 let a = files
402 .get(&PathBuf::from("a.h"))
403 .expect("a.h must be retained");
404 assert!(a.indirect_includes.contains("a.h"));
405 assert!(a.indirect_includes.contains("b.h"));
406
407 let b = files
408 .get(&PathBuf::from("b.h"))
409 .expect("b.h must be retained");
410 assert!(b.indirect_includes.contains("a.h"));
411 assert!(b.indirect_includes.contains("b.h"));
412 }
413}