#![allow(
clippy::enum_glob_use,
clippy::if_not_else,
clippy::too_many_lines,
clippy::wildcard_imports
)]
use std::collections::{HashMap, HashSet, hash_map};
use std::path::{Path, PathBuf};
use petgraph::{
Direction, algo::kosaraju_scc, graph::NodeIndex, stable_graph::StableGraph, visit::Dfs,
};
use serde::{Deserialize, Serialize};
use crate::c_langs_macros::is_specials;
use crate::langs::*;
use crate::languages::language_preproc::*;
use crate::tools::*;
use crate::traits::*;
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct PreprocFile {
pub direct_includes: HashSet<String>,
pub indirect_includes: HashSet<String>,
pub macros: HashSet<String>,
}
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct PreprocResults {
pub files: HashMap<PathBuf, PreprocFile>,
}
impl PreprocFile {
#[must_use]
pub fn new_macros(macros: &[&str]) -> Self {
let mut pf = Self::default();
for m in macros {
pf.macros.insert((*m).to_string());
}
pf
}
}
pub fn get_macros<S: ::std::hash::BuildHasher>(
file: &Path,
files: &HashMap<PathBuf, PreprocFile, S>,
) -> HashSet<String> {
let mut macros = HashSet::new();
if let Some(pf) = files.get(file) {
for m in &pf.macros {
macros.insert(m.clone());
}
for f in &pf.indirect_includes {
if let Some(pf) = files.get(&PathBuf::from(f)) {
for m in &pf.macros {
macros.insert(m.clone());
}
}
}
}
macros
}
pub fn fix_includes<S: ::std::hash::BuildHasher>(
files: &mut HashMap<PathBuf, PreprocFile, S>,
all_files: &HashMap<String, Vec<PathBuf>, S>,
) {
let mut nodes: HashMap<PathBuf, NodeIndex> = HashMap::new();
let mut g = StableGraph::new();
for (file, pf) in files.iter() {
let node = match nodes.entry(file.clone()) {
hash_map::Entry::Occupied(l) => *l.get(),
hash_map::Entry::Vacant(p) => *p.insert(g.add_node(file.clone())),
};
let direct_includes = &pf.direct_includes;
for i in direct_includes {
let possibilities = guess_file(file, i, all_files);
for i in possibilities {
if &i != file {
let i = match nodes.entry(i.clone()) {
hash_map::Entry::Occupied(l) => *l.get(),
hash_map::Entry::Vacant(p) => *p.insert(g.add_node(i)),
};
g.add_edge(node, i, 0);
} else {
eprintln!("Warning: possible self inclusion {}", file.display());
}
}
}
}
let mut scc = kosaraju_scc(&g);
let mut scc_map: HashMap<NodeIndex, HashSet<String>> = HashMap::new();
for component in &mut scc {
if component.len() > 1 {
let mut incoming = Vec::new();
let mut outgoing = Vec::new();
let mut paths = HashSet::new();
for c in component.iter() {
for i in g.neighbors_directed(*c, Direction::Incoming) {
if !component.contains(&i) && !incoming.contains(&i) {
incoming.push(i);
}
}
for o in g.neighbors_directed(*c, Direction::Outgoing) {
if !component.contains(&o) && !outgoing.contains(&o) {
outgoing.push(o);
}
}
}
let replacement = g.add_node(PathBuf::from(""));
for i in incoming.drain(..) {
g.add_edge(i, replacement, 0);
}
for o in outgoing.drain(..) {
g.add_edge(replacement, o, 0);
}
for c in component.drain(..) {
let path = g
.remove_node(c)
.expect("invariant: SCC component node must exist in graph");
if let Some(s) = path.to_str() {
paths.insert(s.to_string());
} else {
eprintln!(
"warning: skipping non-UTF-8 path in include cycle: {}",
path.display()
);
}
*nodes
.get_mut(&path)
.expect("invariant: every graph node must have a nodes map entry") =
replacement;
}
eprintln!("Warning: possible include cycle:");
for p in &paths {
eprintln!(" - \"{p}\"");
}
eprintln!();
scc_map.insert(replacement, paths);
}
}
for (path, node) in nodes {
let mut dfs = Dfs::new(&g, node);
if let Some(pf) = files.get_mut(&path) {
let x_inc = &mut pf.indirect_includes;
while let Some(node) = dfs.next(&g) {
let w = g
.node_weight(node)
.expect("invariant: DFS-visited node must have weight in graph");
if w == &PathBuf::from("") {
let paths = scc_map.get(&node);
if let Some(paths) = paths {
for p in paths {
x_inc.insert(p.clone());
}
} else {
unreachable!(
"every empty-path node is an SCC replacement and must have a scc_map entry"
);
}
} else {
let Some(s) = w.to_str() else {
eprintln!(
"warning: skipping non-UTF-8 indirect include path: {}",
w.display()
);
continue;
};
x_inc.insert(s.to_string());
}
}
} else {
eprintln!(
"Warning: included file which has not been preprocessed: {}",
path.display()
);
}
}
}
pub fn preprocess(parser: &PreprocParser, path: &Path, results: &mut PreprocResults) {
let node = parser.get_root();
let mut cursor = node.cursor();
let mut stack = Vec::new();
let code = parser.get_code();
let mut file_result = PreprocFile::default();
stack.push(node);
while let Some(node) = stack.pop() {
cursor.reset(&node);
if cursor.goto_first_child() {
loop {
stack.push(cursor.node());
if !cursor.goto_next_sibling() {
break;
}
}
}
let id = Preproc::from(node.kind_id());
match id {
Preproc::Define | Preproc::Undef => {
cursor.reset(&node);
cursor.goto_first_child();
let identifier = cursor.node();
if identifier.kind_id() == Preproc::Identifier {
let Some(macro_text) = identifier.utf8_text(code) else {
continue;
};
if !is_specials(macro_text) {
file_result.macros.insert(macro_text.to_string());
}
}
}
Preproc::PreprocInclude => {
cursor.reset(&node);
cursor.goto_first_child();
let file = cursor.node();
if file.kind_id() == Preproc::StringLiteral {
let file = &code[file.start_byte() + 1..file.end_byte() - 1];
let Some(start) = file.iter().position(|&c| c != b' ' && c != b'\t') else {
continue;
};
let Some(end) = file.iter().rposition(|&c| c != b' ' && c != b'\t') else {
continue;
};
let file = &file[start..=end];
let Ok(file) = std::str::from_utf8(file) else {
continue;
};
file_result.direct_includes.insert(file.to_string());
}
}
_ => {}
}
}
results.files.insert(path.to_path_buf(), file_result);
}
#[cfg(test)]
#[allow(
clippy::float_cmp,
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::similar_names,
clippy::doc_markdown,
clippy::needless_raw_string_hashes,
clippy::too_many_lines
)]
mod tests {
use super::*;
fn parse(source: &str) -> PreprocParser {
PreprocParser::new(source.as_bytes().to_vec(), &PathBuf::from("test.h"), None)
}
#[test]
fn preprocess_empty_include_does_not_panic() {
let parser = parse("#include \"\"\n");
let mut results = PreprocResults::default();
preprocess(&parser, &PathBuf::from("test.h"), &mut results);
let pf = results
.files
.get(&PathBuf::from("test.h"))
.expect("file entry must be inserted");
assert!(pf.direct_includes.is_empty());
}
#[test]
fn preprocess_whitespace_only_include_does_not_panic() {
let parser = parse("#include \" \"\n");
let mut results = PreprocResults::default();
preprocess(&parser, &PathBuf::from("test.h"), &mut results);
let pf = results
.files
.get(&PathBuf::from("test.h"))
.expect("file entry must be inserted");
assert!(pf.direct_includes.is_empty());
}
#[test]
fn preprocess_valid_include_is_recorded() {
let parser = parse("#include \" foo.h \"\n");
let mut results = PreprocResults::default();
preprocess(&parser, &PathBuf::from("test.h"), &mut results);
let pf = results
.files
.get(&PathBuf::from("test.h"))
.expect("file entry must be inserted");
assert!(pf.direct_includes.contains("foo.h"));
}
#[test]
fn preprocess_define_records_macro() {
let parser = parse("#define FOO 1\n");
let mut results = PreprocResults::default();
preprocess(&parser, &PathBuf::from("test.h"), &mut results);
let pf = results
.files
.get(&PathBuf::from("test.h"))
.expect("file entry must be inserted");
assert!(pf.macros.contains("FOO"));
}
#[test]
fn fix_includes_handles_simple_cycle() {
let mut files: HashMap<PathBuf, PreprocFile> = HashMap::new();
let mut a = PreprocFile::default();
a.direct_includes.insert("b.h".to_string());
let mut b = PreprocFile::default();
b.direct_includes.insert("a.h".to_string());
files.insert(PathBuf::from("a.h"), a);
files.insert(PathBuf::from("b.h"), b);
let mut all_files: HashMap<String, Vec<PathBuf>> = HashMap::new();
all_files.insert("a.h".to_string(), vec![PathBuf::from("a.h")]);
all_files.insert("b.h".to_string(), vec![PathBuf::from("b.h")]);
fix_includes(&mut files, &all_files);
let a = files
.get(&PathBuf::from("a.h"))
.expect("a.h must be retained");
assert!(a.indirect_includes.contains("a.h"));
assert!(a.indirect_includes.contains("b.h"));
let b = files
.get(&PathBuf::from("b.h"))
.expect("b.h must be retained");
assert!(b.indirect_includes.contains("a.h"));
assert!(b.indirect_includes.contains("b.h"));
}
}