impl DataScienceAnalyzer {
#[allow(clippy::cast_possible_truncation)]
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn calculate_pagerank(&self, findings: &mut [Finding], dependencies: &[(String, String)]) {
if findings.is_empty() || dependencies.is_empty() {
return;
}
let mut file_to_node: HashMap<String, usize> = HashMap::new();
let mut node_id = 0;
for finding in findings.iter() {
let file = finding.location.file.to_string_lossy().to_string();
if let std::collections::hash_map::Entry::Vacant(e) = file_to_node.entry(file) {
e.insert(node_id);
node_id += 1;
}
}
for (from, to) in dependencies {
if let std::collections::hash_map::Entry::Vacant(e) =
file_to_node.entry(from.clone())
{
e.insert(node_id);
node_id += 1;
}
if let std::collections::hash_map::Entry::Vacant(e) = file_to_node.entry(to.clone()) {
e.insert(node_id);
node_id += 1;
}
}
if node_id == 0 {
return;
}
let mut graph = trueno_graph::CsrGraph::new();
for (from, to) in dependencies {
if let (Some(&from_id), Some(&to_id)) =
(file_to_node.get(from), file_to_node.get(to))
{
graph.set_node_name(trueno_graph::NodeId(from_id as u32), from.clone());
graph.set_node_name(trueno_graph::NodeId(to_id as u32), to.clone());
let _ = graph.add_edge(
trueno_graph::NodeId(from_id as u32),
trueno_graph::NodeId(to_id as u32),
1.0,
);
}
}
let scores = match trueno_graph::pagerank(&graph, 20, 1e-6) {
Ok(s) => s,
Err(_) => return,
};
for finding in findings.iter_mut() {
let file = finding.location.file.to_string_lossy().to_string();
if let Some(&node) = file_to_node.get(&file) {
if node < scores.len() {
finding.pagerank = Some(scores[node] as f32);
}
}
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn detect_communities(
&self,
findings: &mut [Finding],
dependencies: &[(String, String)],
) -> Vec<CodeCommunity> {
if findings.is_empty() {
return Vec::new();
}
let mut file_to_node: HashMap<String, usize> = HashMap::new();
let mut node_to_file: HashMap<usize, String> = HashMap::new();
let mut node_id = 0;
for finding in findings.iter() {
let file = finding.location.file.to_string_lossy().to_string();
if !file_to_node.contains_key(&file) {
file_to_node.insert(file.clone(), node_id);
node_to_file.insert(node_id, file);
node_id += 1;
}
}
if dependencies.is_empty() || node_id == 0 {
return self.build_file_communities(findings);
}
for (from, to) in dependencies {
if !file_to_node.contains_key(from) {
file_to_node.insert(from.clone(), node_id);
node_to_file.insert(node_id, from.clone());
node_id += 1;
}
if !file_to_node.contains_key(to) {
file_to_node.insert(to.clone(), node_id);
node_to_file.insert(node_id, to.clone());
node_id += 1;
}
}
let edges: Vec<(usize, usize)> = dependencies
.iter()
.filter_map(
|(from, to)| match (file_to_node.get(from), file_to_node.get(to)) {
(Some(&from_id), Some(&to_id)) => Some((from_id, to_id)),
_ => None,
},
)
.collect();
let graph = aprender::graph::Graph::from_edges(&edges, false);
let community_assignments = graph.louvain();
let mut node_to_community: HashMap<usize, usize> = HashMap::new();
for (comm_id, nodes) in community_assignments.iter().enumerate() {
for &node in nodes {
node_to_community.insert(node, comm_id);
}
}
for finding in findings.iter_mut() {
let file = finding.location.file.to_string_lossy().to_string();
if let Some(&node) = file_to_node.get(&file) {
if let Some(&comm) = node_to_community.get(&node) {
finding.community = Some(format!("community_{}", comm));
}
}
}
community_assignments
.iter()
.enumerate()
.filter(|(_, nodes): &(usize, &Vec<usize>)| !nodes.is_empty())
.map(|(comm_id, nodes): (usize, &Vec<usize>)| {
let files: Vec<PathBuf> = nodes
.iter()
.filter_map(|n| node_to_file.get(n))
.map(PathBuf::from)
.collect();
let defect_count = findings
.iter()
.filter(|f| {
f.community
.as_ref()
.map(|c| c == &format!("community_{}", comm_id))
.unwrap_or(false)
})
.count();
let primary_issue = findings
.iter()
.filter(|f| {
f.community
.as_ref()
.map(|c| c == &format!("community_{}", comm_id))
.unwrap_or(false)
})
.fold(HashMap::new(), |mut acc, f| {
*acc.entry(f.category.clone()).or_insert(0usize) += 1;
acc
})
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(cat, _)| cat);
CodeCommunity {
name: format!("community_{}", comm_id),
modularity: 0.0, files,
primary_issue,
defect_count,
}
})
.collect()
}
fn build_file_communities(&self, findings: &mut [Finding]) -> Vec<CodeCommunity> {
let mut communities = Vec::new();
for finding in findings.iter_mut() {
let file = finding.location.file.to_string_lossy().to_string();
finding.community = Some(file.clone());
}
let mut file_groups: HashMap<String, Vec<&Finding>> = HashMap::new();
for finding in findings.iter() {
let file = finding.location.file.to_string_lossy().to_string();
file_groups.entry(file).or_default().push(finding);
}
for (file, group_findings) in file_groups {
let mut category_counts: HashMap<&str, usize> = HashMap::new();
for f in &group_findings {
*category_counts.entry(&f.category).or_insert(0) += 1;
}
let primary = category_counts
.into_iter()
.max_by_key(|(_, c)| *c)
.map(|(cat, _)| cat.to_string());
communities.push(CodeCommunity {
name: PathBuf::from(&file)
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or(file.clone()),
modularity: 1.0,
files: vec![PathBuf::from(&file)],
primary_issue: primary,
defect_count: group_findings.len(),
});
}
communities
}
}