rust_code_analysis_code_split/
preproc.rs1use std::collections::{HashMap, HashSet, hash_map};
2use std::path::{Path, PathBuf};
3
4use petgraph::{
5 Direction, algo::kosaraju_scc, graph::NodeIndex, stable_graph::StableGraph, visit::Dfs,
6};
7use serde::{Deserialize, Serialize};
8
9use crate::c_langs_macros::is_specials;
10
11use crate::langs::*;
12use crate::languages::language_preproc::*;
13use crate::tools::*;
14use crate::traits::*;
15
16#[derive(Debug, Default, Deserialize, Serialize)]
18pub struct PreprocFile {
19 pub direct_includes: HashSet<String>,
21 pub indirect_includes: HashSet<String>,
24 pub macros: HashSet<String>,
26}
27
28#[derive(Debug, Default, Deserialize, Serialize)]
30pub struct PreprocResults {
31 pub files: HashMap<PathBuf, PreprocFile>,
33}
34
35impl PreprocFile {
36 pub fn new_macros(macros: &[&str]) -> Self {
38 let mut pf = Self::default();
39 for m in macros {
40 pf.macros.insert((*m).to_string());
41 }
42 pf
43 }
44}
45
46pub fn get_macros<S: ::std::hash::BuildHasher>(
48 file: &Path,
49 files: &HashMap<PathBuf, PreprocFile, S>,
50) -> HashSet<String> {
51 let mut macros = HashSet::new();
52 if let Some(pf) = files.get(file) {
53 for m in pf.macros.iter() {
54 macros.insert(m.to_string());
55 }
56 for f in pf.indirect_includes.iter() {
57 if let Some(pf) = files.get(&PathBuf::from(f)) {
58 for m in pf.macros.iter() {
59 macros.insert(m.to_string());
60 }
61 }
62 }
63 }
64 macros
65}
66
67pub fn fix_includes<S: ::std::hash::BuildHasher>(
73 files: &mut HashMap<PathBuf, PreprocFile, S>,
74 all_files: &HashMap<String, Vec<PathBuf>, S>,
75) {
76 let mut nodes: HashMap<PathBuf, NodeIndex> = HashMap::new();
77 let mut g = StableGraph::new();
80
81 for (file, pf) in files.iter() {
83 let node = match nodes.entry(file.clone()) {
84 hash_map::Entry::Occupied(l) => *l.get(),
85 hash_map::Entry::Vacant(p) => *p.insert(g.add_node(file.clone())),
86 };
87 let direct_includes = &pf.direct_includes;
88 for i in direct_includes {
89 let possibilities = guess_file(file, i, all_files);
90 for i in possibilities {
91 if &i != file {
92 let i = match nodes.entry(i.clone()) {
93 hash_map::Entry::Occupied(l) => *l.get(),
94 hash_map::Entry::Vacant(p) => *p.insert(g.add_node(i)),
95 };
96 g.add_edge(node, i, 0);
97 } else {
98 eprintln!("Warning: possible self inclusion {file:?}");
100 }
101 }
102 }
103 }
104
105 let mut scc = kosaraju_scc(&g);
110 let mut scc_map: HashMap<NodeIndex, HashSet<String>> = HashMap::new();
111 for component in scc.iter_mut() {
112 if component.len() > 1 {
113 let mut incoming = Vec::new();
117 let mut outgoing = Vec::new();
118 let mut paths = HashSet::new();
119
120 for c in component.iter() {
121 for i in g.neighbors_directed(*c, Direction::Incoming) {
122 if !component.contains(&i) && !incoming.contains(&i) {
123 incoming.push(i);
124 }
125 }
126 for o in g.neighbors_directed(*c, Direction::Outgoing) {
127 if !component.contains(&o) && !outgoing.contains(&o) {
128 outgoing.push(o);
129 }
130 }
131 }
132
133 let replacement = g.add_node(PathBuf::from(""));
134 for i in incoming.drain(..) {
135 g.add_edge(i, replacement, 0);
136 }
137 for o in outgoing.drain(..) {
138 g.add_edge(replacement, o, 0);
139 }
140 for c in component.drain(..) {
141 let path = g.remove_node(c).unwrap();
142 paths.insert(path.to_str().unwrap().to_string());
143 *nodes.get_mut(&path).unwrap() = replacement;
144 }
145
146 eprintln!("Warning: possible include cycle:");
147 for p in paths.iter() {
148 eprintln!(" - {p:?}");
149 }
150 eprintln!();
151
152 scc_map.insert(replacement, paths);
153 }
154 }
155
156 for (path, node) in nodes {
157 let mut dfs = Dfs::new(&g, node);
158 if let Some(pf) = files.get_mut(&path) {
159 let x_inc = &mut pf.indirect_includes;
160 while let Some(node) = dfs.next(&g) {
161 let w = g.node_weight(node).unwrap();
162 if w == &PathBuf::from("") {
163 let paths = scc_map.get(&node);
164 if let Some(paths) = paths {
165 for p in paths {
166 x_inc.insert(p.to_string());
167 }
168 } else {
169 eprintln!("DEBUG: {path:?} {node:?}");
170 }
171 } else {
172 x_inc.insert(w.to_str().unwrap().to_string());
173 }
174 }
175 } else {
176 eprintln!(
177 "Warning: included file which has not been preprocessed: {:?}",
178 path
179 );
180 }
181 }
182}
183
184pub fn preprocess(parser: &PreprocParser, path: &Path, results: &mut PreprocResults) {
190 let node = parser.get_root();
191 let mut cursor = node.cursor();
192 let mut stack = Vec::new();
193 let code = parser.get_code();
194 let mut file_result = PreprocFile::default();
195
196 stack.push(node);
197
198 while let Some(node) = stack.pop() {
199 cursor.reset(&node);
200 if cursor.goto_first_child() {
201 loop {
202 stack.push(cursor.node());
203 if !cursor.goto_next_sibling() {
204 break;
205 }
206 }
207 }
208
209 let id = Preproc::from(node.kind_id());
210 match id {
211 Preproc::Define | Preproc::Undef => {
212 cursor.reset(&node);
213 cursor.goto_first_child();
214 let identifier = cursor.node();
215
216 if identifier.kind_id() == Preproc::Identifier {
217 let r#macro = identifier.utf8_text(code).unwrap();
218 if !is_specials(r#macro) {
219 file_result.macros.insert(r#macro.to_string());
220 }
221 }
222 }
223 Preproc::PreprocInclude => {
224 cursor.reset(&node);
225 cursor.goto_first_child();
226 let file = cursor.node();
227
228 if file.kind_id() == Preproc::StringLiteral {
229 let file = &code[file.start_byte() + 1..file.end_byte() - 1];
231 let start = file.iter().position(|&c| c != b' ' && c != b'\t').unwrap();
232 let end = file.iter().rposition(|&c| c != b' ' && c != b'\t').unwrap();
233 let file = &file[start..=end];
234 let file = String::from_utf8(file.to_vec()).unwrap();
235 file_result.direct_includes.insert(file);
236 }
237 }
238 _ => {}
239 }
240 }
241
242 results.files.insert(path.to_path_buf(), file_result);
243}