use std::path::PathBuf;
use anyhow::Result;
use crate::graph::Graph;
#[cfg(feature = "infomap")]
pub mod clustering;
#[cfg(feature = "infomap")]
pub mod labeling;
#[cfg(feature = "infomap")]
pub mod integration;
#[cfg(feature = "cli-llm")]
mod llm;
#[cfg(feature = "cli-llm")]
pub use llm::CliLlm;
#[cfg(feature = "infomap")]
pub use clustering::{
auto_config, auto_config_with_network, auto_name, auto_name_hierarchical,
add_co_citation_edges, add_dir_colocation_edges, add_symbol_similarity_edges,
build_network, cluster, map_to_components, relation_weight, run_clustering,
identify_hubs, exclude_hubs_from_network,
ClusterConfig, ClusterMetrics, ClusterResult, RawCluster,
WEIGHT_CALLS, WEIGHT_CO_CITATION, WEIGHT_DEPENDS_ON, WEIGHT_DIR_COLOCATION, WEIGHT_IMPORTS,
WEIGHT_STRUCTURAL, WEIGHT_SYMBOL_SIMILARITY, WEIGHT_TYPE_REF,
COLOCATION_PAIRWISE_LIMIT, CO_CITATION_MIN_SHARED,
SYMBOL_MIN_SHARED_TOKENS, SYMBOL_MIN_JACCARD,
};
#[cfg(feature = "infomap")]
pub use integration::{merge_into_graph, rollback_infer_batch, format_output, InferResult, MergeStats, OutputFormat};
#[cfg(feature = "infomap")]
pub use labeling::{LabelingConfig, LabelingResult, SimpleLlm};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InferLevel {
Component,
Feature,
All,
}
impl Default for InferLevel {
fn default() -> Self {
Self::All
}
}
#[cfg(feature = "infomap")]
#[derive(Debug, Clone)]
pub struct InferConfig {
pub clustering: ClusterConfig,
pub labeling: Option<LabelingConfig>,
pub level: InferLevel,
pub format: OutputFormat,
pub dry_run: bool,
pub source_dir: Option<PathBuf>,
}
#[cfg(feature = "infomap")]
impl Default for InferConfig {
fn default() -> Self {
Self {
clustering: ClusterConfig::default(),
labeling: Some(LabelingConfig::default()),
level: InferLevel::All,
format: OutputFormat::Summary,
dry_run: false,
source_dir: None,
}
}
}
#[cfg(feature = "infomap")]
pub async fn run(
graph: &Graph,
config: &InferConfig,
llm: Option<&dyn SimpleLlm>,
) -> Result<InferResult> {
#[allow(unused_assignments)]
let auto_extracted: Option<Graph>;
let effective_graph = if graph.code_nodes().is_empty() {
if let Some(source_dir) = &config.source_dir {
use crate::code_graph::CodeGraph;
use crate::unify::codegraph_to_graph_nodes;
tracing::info!(
source_dir = ?source_dir,
"No code nodes found, auto-extracting"
);
let code_graph = CodeGraph::extract_from_dir(source_dir);
let (code_nodes, code_edges) = codegraph_to_graph_nodes(&code_graph, source_dir);
let mut wg = graph.clone();
for node in code_nodes {
wg.add_node(node);
}
for edge in code_edges {
wg.add_edge_dedup(edge);
}
auto_extracted = Some(wg);
auto_extracted.as_ref().unwrap()
} else {
return Err(anyhow::anyhow!(
"No code layer in graph. Run `gid extract` first or pass --source <dir>."
));
}
} else {
auto_extracted = None; _ = &auto_extracted;
graph
};
let effective_clustering_config = {
let file_count = effective_graph
.nodes
.iter()
.filter(|n| {
n.node_type.as_deref() == Some("file")
|| (n.node_type.as_deref() == Some("code")
&& n.node_kind.as_deref() == Some("File"))
})
.count();
if config.clustering.min_community_size == ClusterConfig::default().min_community_size {
let (net, _) = clustering::build_network(effective_graph);
let mut auto = clustering::auto_config_with_network(file_count, &net);
if config.clustering.max_cluster_size.is_some() {
auto.max_cluster_size = config.clustering.max_cluster_size;
}
if config.clustering.hierarchical {
auto.hierarchical = true;
}
auto
} else {
config.clustering.clone()
}
};
let cluster_result = clustering::cluster(effective_graph, &effective_clustering_config)?;
if cluster_result.nodes.is_empty() {
return Ok(InferResult::empty("No communities detected"));
}
let labeling_config = config.labeling.clone().unwrap_or_default();
let labeling_result = match config.level {
InferLevel::Component => LabelingResult::empty(),
InferLevel::Feature | InferLevel::All => {
labeling::label(effective_graph, &cluster_result, llm, labeling_config).await?
}
};
tracing::debug!(
labels = labeling_result.component_labels.len(),
features = labeling_result.features.len(),
feature_edges = labeling_result.feature_edges.len(),
"Labeling result"
);
let result = InferResult::from_phases(&cluster_result, &labeling_result);
tracing::debug!(
components = result.component_nodes.len(),
features = result.feature_nodes.len(),
edges = result.edges.len(),
"InferResult"
);
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_infer_level_default() {
assert_eq!(InferLevel::default(), InferLevel::All);
}
#[cfg(feature = "infomap")]
#[test]
fn test_infer_config_default() {
let config = InferConfig::default();
assert_eq!(config.level, InferLevel::All);
assert!(!config.dry_run);
assert!(config.source_dir.is_none());
assert!(config.labeling.is_some());
assert_eq!(config.format, OutputFormat::Summary);
}
#[cfg(feature = "infomap")]
#[tokio::test]
async fn test_auto_extract_trigger() {
use std::path::PathBuf;
let graph = crate::graph::Graph::new();
let config = InferConfig {
source_dir: Some(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src")),
level: InferLevel::Component,
labeling: None,
..InferConfig::default()
};
let result = run(&graph, &config, None).await;
assert!(
result.is_ok(),
"run() with empty graph + source_dir should auto-extract: {:?}",
result.err(),
);
let infer_result = result.unwrap();
assert!(
infer_result.component_nodes.is_empty()
|| infer_result.component_nodes.iter().all(|n| n.source.as_deref() == Some("infer")),
"All component nodes should have source=infer",
);
}
#[cfg(feature = "infomap")]
#[tokio::test]
async fn test_auto_extract_no_source() {
let graph = crate::graph::Graph::new();
let config = InferConfig {
source_dir: None,
..InferConfig::default()
};
let result = run(&graph, &config, None).await;
assert!(result.is_err(), "run() with empty graph + no source_dir should error");
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("No code layer"),
"Error should mention 'No code layer', got: {}",
err_msg,
);
}
}