Skip to main content

tsift_graph/
lib.rs

1use anyhow::Result;
2use lazily::{CellHandle, Context as LazyContext, SlotHandle};
3use serde::{Deserialize, Serialize};
4use std::cell::{Cell, RefCell};
5use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
6use std::path::{Path, PathBuf};
7use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator};
8use tsift_core::{GraphEdge, GraphNode, GraphProjection, GraphProvenance};
9
10pub mod lang;
11pub use lang::{Lang, Symbol};
12
13pub mod complexity;
14pub use complexity::{ComplexityMetrics, LanguageExtractor, LanguageRegistry};
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct CallSite {
18    pub callee: String,
19    pub line: usize,
20}
21
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct CallEdge {
24    pub caller: String,
25    pub callee: String,
26    pub caller_line: usize,
27    pub call_site_line: usize,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub struct FileMtime {
32    pub secs: i64,
33    pub nanos: u32,
34}
35
36impl FileMtime {
37    pub fn new(secs: i64, nanos: u32) -> Self {
38        Self { secs, nanos }
39    }
40}
41
42#[derive(Debug, Clone, PartialEq, Eq, Hash)]
43struct ResolveEdgesKey {
44    file: PathBuf,
45    content_hash: String,
46}
47
48#[derive(Clone, Copy)]
49struct ResolveEdgesSlot {
50    mtime: CellHandle<FileMtime>,
51    edges: SlotHandle<Vec<CallEdge>>,
52}
53
54pub struct ResolveEdgesCache {
55    ctx: LazyContext,
56    slots: RefCell<HashMap<ResolveEdgesKey, ResolveEdgesSlot>>,
57    hits: Cell<usize>,
58    misses: Cell<usize>,
59}
60
61impl Default for ResolveEdgesCache {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl ResolveEdgesCache {
68    pub fn new() -> Self {
69        Self {
70            ctx: LazyContext::new(),
71            slots: RefCell::new(HashMap::new()),
72            hits: Cell::new(0),
73            misses: Cell::new(0),
74        }
75    }
76
77    pub fn resolve_edges_for_file(
78        &self,
79        file: &Path,
80        content_hash: &str,
81        mtime: FileMtime,
82        symbols: &[Symbol],
83        call_sites: &[CallSite],
84    ) -> Vec<CallEdge> {
85        let key = ResolveEdgesKey {
86            file: file.to_path_buf(),
87            content_hash: content_hash.to_string(),
88        };
89        let slot = {
90            let mut slots = self.slots.borrow_mut();
91            if let Some(slot) = slots.get(&key) {
92                self.ctx.set_cell(&slot.mtime, mtime);
93                *slot
94            } else {
95                let mtime_cell = self.ctx.cell(mtime);
96                let symbols = symbols.to_vec();
97                let call_sites = call_sites.to_vec();
98                let edges = self.ctx.slot(move |ctx| {
99                    let _mtime = ctx.get_cell(&mtime_cell);
100                    resolve_edges_uncached(&symbols, &call_sites)
101                });
102                let slot = ResolveEdgesSlot {
103                    mtime: mtime_cell,
104                    edges,
105                };
106                slots.insert(key, slot);
107                slot
108            }
109        };
110        if self.ctx.is_set(&slot.edges) {
111            self.hits.set(self.hits.get() + 1);
112        } else {
113            self.misses.set(self.misses.get() + 1);
114        }
115        self.ctx.get(&slot.edges)
116    }
117
118    pub fn stats(&self) -> (usize, usize) {
119        (self.hits.get(), self.misses.get())
120    }
121}
122
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct RouteSite {
125    pub framework: String,
126    pub method: Option<String>,
127    pub path: String,
128    pub handler: String,
129    pub line: usize,
130    pub handler_line: Option<usize>,
131}
132
133#[derive(Debug, Clone)]
134struct PendingRoute {
135    framework: String,
136    method: Option<String>,
137    path: String,
138    line: usize,
139}
140
141pub fn extract_call_sites(lang: Lang, source: &[u8]) -> Result<Vec<CallSite>> {
142    let query_str = match lang.call_query() {
143        Some(q) => q,
144        None => return Ok(Vec::new()),
145    };
146    let mut parser = Parser::new();
147    let ts_lang = lang.tree_sitter_language();
148    parser.set_language(&ts_lang)?;
149    let tree = parser
150        .parse(source, None)
151        .ok_or_else(|| anyhow::anyhow!("parse failed"))?;
152    let query = Query::new(&ts_lang, query_str)?;
153    let mut cursor = QueryCursor::new();
154    let mut sites = Vec::new();
155    let capture_names: Vec<String> = query
156        .capture_names()
157        .iter()
158        .map(|s| s.to_string())
159        .collect();
160
161    let mut matches = cursor.matches(&query, tree.root_node(), source);
162    while let Some(m) = matches.next() {
163        for capture in m.captures {
164            let name = &capture_names[capture.index as usize];
165            if name == "call.name" {
166                let callee = capture
167                    .node
168                    .utf8_text(source)
169                    .unwrap_or("<invalid utf8>")
170                    .to_string();
171                sites.push(CallSite {
172                    callee,
173                    line: capture.node.start_position().row,
174                });
175            }
176        }
177    }
178    Ok(sites)
179}
180
181pub fn source_content_hash(source: &[u8]) -> String {
182    blake3::hash(source).to_hex().to_string()
183}
184
185pub fn extract_route_sites(lang: Lang, source: &[u8]) -> Result<Vec<RouteSite>> {
186    let text = std::str::from_utf8(source)?;
187    Ok(match lang {
188        #[cfg(feature = "lang-rust")]
189        Lang::Rust => extract_rust_routes(text),
190        #[cfg(feature = "lang-python")]
191        Lang::Python => extract_python_routes(text),
192        #[cfg(feature = "lang-typescript")]
193        Lang::TypeScript | Lang::Tsx => extract_typescript_routes(text),
194        #[cfg(feature = "lang-javascript")]
195        Lang::JavaScript | Lang::Jsx => extract_typescript_routes(text),
196        _ => Vec::new(),
197    })
198}
199
200fn extract_string_literal(input: &str) -> Option<(String, usize)> {
201    let mut chars = input.char_indices();
202    while let Some((start, ch)) = chars.next() {
203        if ch != '"' && ch != '\'' {
204            continue;
205        }
206        let quote = ch;
207        let mut escaped = false;
208        let mut value = String::new();
209        for (offset, current) in chars.by_ref() {
210            if escaped {
211                value.push(current);
212                escaped = false;
213                continue;
214            }
215            if current == '\\' {
216                escaped = true;
217                continue;
218            }
219            if current == quote {
220                return Some((value, offset + current.len_utf8()));
221            }
222            value.push(current);
223        }
224        return Some((input[start + quote.len_utf8()..].to_string(), input.len()));
225    }
226    None
227}
228
229fn first_identifier(input: &str) -> Option<String> {
230    let mut start = None;
231    for (idx, ch) in input.char_indices() {
232        if start.is_none() {
233            if ch == '_' || ch.is_ascii_alphabetic() {
234                start = Some(idx);
235            }
236            continue;
237        }
238        if !(ch == '_' || ch.is_ascii_alphanumeric()) {
239            let value = input[start.unwrap()..idx].to_string();
240            return (!is_handler_keyword(&value)).then_some(value);
241        }
242    }
243    start
244        .map(|idx| input[idx..].to_string())
245        .filter(|value| !is_handler_keyword(value))
246}
247
248fn is_handler_keyword(value: &str) -> bool {
249    matches!(
250        value,
251        "async" | "await" | "function" | "move" | "None" | "Some" | "lambda"
252    )
253}
254
255fn route_methods() -> &'static [&'static str] {
256    &[
257        "get", "post", "put", "patch", "delete", "head", "options", "any", "route",
258    ]
259}
260
261fn parse_wrapped_handler(input: &str) -> (Option<String>, Option<String>) {
262    for method in route_methods() {
263        let needle = format!("{method}(");
264        if let Some(pos) = input.find(&needle) {
265            let inside = &input[pos + needle.len()..];
266            return (
267                Some((*method).to_string()),
268                first_identifier(inside).or_else(|| Some("<inline>".to_string())),
269            );
270        }
271    }
272    (None, first_identifier(input))
273}
274
275fn parse_rust_fn_name(line: &str) -> Option<String> {
276    let pos = line.find("fn ")?;
277    first_identifier(&line[pos + 3..])
278}
279
280fn parse_route_attribute(line: &str, framework: &str) -> Option<PendingRoute> {
281    let trimmed = line.trim_start();
282    let rest = trimmed.strip_prefix("#[")?;
283    for method in route_methods() {
284        let Some(method_rest) = rest.strip_prefix(method) else {
285            continue;
286        };
287        if !method_rest.trim_start().starts_with('(') {
288            continue;
289        }
290        let (path, _) = extract_string_literal(method_rest)?;
291        return Some(PendingRoute {
292            framework: framework.to_string(),
293            method: Some((*method).to_string()),
294            path,
295            line: 0,
296        });
297    }
298    None
299}
300
301fn extract_rust_routes(text: &str) -> Vec<RouteSite> {
302    let mut routes = Vec::new();
303    let mut pending = Vec::<PendingRoute>::new();
304
305    for (line_idx, line) in text.lines().enumerate() {
306        if let Some(mut attr) = parse_route_attribute(line, "actix") {
307            attr.line = line_idx;
308            if let Some(handler) = parse_rust_fn_name(line) {
309                routes.push(RouteSite {
310                    framework: attr.framework,
311                    method: attr.method,
312                    path: attr.path,
313                    handler,
314                    line: attr.line,
315                    handler_line: Some(line_idx),
316                });
317            } else {
318                pending.push(attr);
319            }
320        } else if !pending.is_empty()
321            && let Some(handler) = parse_rust_fn_name(line)
322        {
323            for attr in pending.drain(..) {
324                routes.push(RouteSite {
325                    framework: attr.framework,
326                    method: attr.method,
327                    path: attr.path,
328                    handler: handler.clone(),
329                    line: attr.line,
330                    handler_line: Some(line_idx),
331                });
332            }
333        }
334
335        if let Some(route_pos) = line.find(".route(") {
336            let route_args = &line[route_pos + ".route(".len()..];
337            if let Some((path, end_offset)) = extract_string_literal(route_args) {
338                let args_after_path = &route_args[end_offset..];
339                let (method, handler) = parse_wrapped_handler(args_after_path);
340                if let Some(handler) = handler {
341                    routes.push(RouteSite {
342                        framework: "axum".to_string(),
343                        method: method.or_else(|| Some("route".to_string())),
344                        path,
345                        handler,
346                        line: line_idx,
347                        handler_line: None,
348                    });
349                }
350            }
351        }
352    }
353
354    routes
355}
356
357fn parse_python_def_name(line: &str) -> Option<String> {
358    let trimmed = line.trim_start();
359    let rest = trimmed
360        .strip_prefix("async def ")
361        .or_else(|| trimmed.strip_prefix("def "))?;
362    first_identifier(rest)
363}
364
365fn parse_python_route_decorator(line: &str) -> Option<PendingRoute> {
366    let trimmed = line.trim_start();
367    let rest = trimmed.strip_prefix('@')?;
368    let dot = rest.find('.')?;
369    let after_dot = &rest[dot + 1..];
370    for method in route_methods() {
371        let Some(method_rest) = after_dot.strip_prefix(method) else {
372            continue;
373        };
374        if !method_rest.trim_start().starts_with('(') {
375            continue;
376        }
377        let (path, _) = extract_string_literal(method_rest)?;
378        let framework = if *method == "route" {
379            "flask"
380        } else {
381            "fastapi"
382        };
383        return Some(PendingRoute {
384            framework: framework.to_string(),
385            method: Some((*method).to_string()),
386            path,
387            line: 0,
388        });
389    }
390    None
391}
392
393fn extract_python_routes(text: &str) -> Vec<RouteSite> {
394    let mut routes = Vec::new();
395    let mut pending = Vec::<PendingRoute>::new();
396
397    for (line_idx, line) in text.lines().enumerate() {
398        if let Some(mut route) = parse_python_route_decorator(line) {
399            route.line = line_idx;
400            pending.push(route);
401            continue;
402        }
403
404        if !pending.is_empty()
405            && let Some(handler) = parse_python_def_name(line)
406        {
407            for route in pending.drain(..) {
408                routes.push(RouteSite {
409                    framework: route.framework,
410                    method: route.method,
411                    path: route.path,
412                    handler: handler.clone(),
413                    line: route.line,
414                    handler_line: Some(line_idx),
415                });
416            }
417        }
418    }
419
420    routes
421}
422
423fn parse_ts_method_name(line: &str) -> Option<String> {
424    let trimmed = line.trim_start();
425    first_identifier(trimmed)
426}
427
428fn parse_ts_route_decorator(line: &str) -> Option<PendingRoute> {
429    let trimmed = line.trim_start();
430    let rest = trimmed.strip_prefix('@')?;
431    for method in route_methods() {
432        let mut chars = method.chars();
433        let title = match chars.next() {
434            Some(first) => format!("{}{}", first.to_ascii_uppercase(), chars.as_str()),
435            None => continue,
436        };
437        let Some(method_rest) = rest.strip_prefix(&title) else {
438            continue;
439        };
440        if !method_rest.trim_start().starts_with('(') {
441            continue;
442        }
443        let (path, _) = extract_string_literal(method_rest)?;
444        return Some(PendingRoute {
445            framework: "nestjs".to_string(),
446            method: Some((*method).to_string()),
447            path,
448            line: 0,
449        });
450    }
451    None
452}
453
454fn parse_ts_router_call(line: &str, line_idx: usize) -> Option<RouteSite> {
455    for method in route_methods() {
456        if *method == "route" {
457            continue;
458        }
459        let needle = format!(".{method}(");
460        let Some(pos) = line.find(&needle) else {
461            continue;
462        };
463        let args = &line[pos + needle.len()..];
464        let (path, end_offset) = extract_string_literal(args)?;
465        let handler = args[end_offset..]
466            .split_once(',')
467            .and_then(|(_, rest)| first_identifier(rest))
468            .unwrap_or_else(|| "<inline>".to_string());
469        return Some(RouteSite {
470            framework: "express".to_string(),
471            method: Some((*method).to_string()),
472            path,
473            handler,
474            line: line_idx,
475            handler_line: None,
476        });
477    }
478    None
479}
480
481fn extract_typescript_routes(text: &str) -> Vec<RouteSite> {
482    let mut routes = Vec::new();
483    let mut pending = Vec::<PendingRoute>::new();
484
485    for (line_idx, line) in text.lines().enumerate() {
486        if let Some(mut route) = parse_ts_route_decorator(line) {
487            route.line = line_idx;
488            pending.push(route);
489            continue;
490        }
491
492        if !pending.is_empty()
493            && let Some(handler) = parse_ts_method_name(line)
494        {
495            for route in pending.drain(..) {
496                routes.push(RouteSite {
497                    framework: route.framework,
498                    method: route.method,
499                    path: route.path,
500                    handler: handler.clone(),
501                    line: route.line,
502                    handler_line: Some(line_idx),
503                });
504            }
505        }
506
507        if let Some(route) = parse_ts_router_call(line, line_idx) {
508            routes.push(route);
509        }
510    }
511
512    routes
513}
514
515pub fn resolve_edges(symbols: &[Symbol], call_sites: &[CallSite]) -> Vec<CallEdge> {
516    resolve_edges_uncached(symbols, call_sites)
517}
518
519fn resolve_edges_uncached(symbols: &[Symbol], call_sites: &[CallSite]) -> Vec<CallEdge> {
520    let mut edges = Vec::new();
521    for site in call_sites {
522        let caller = symbols
523            .iter()
524            .filter(|s| s.kind == "function" || s.kind == "class" || s.kind == "mod")
525            .filter(|s| site.line >= s.line && site.line <= s.end_line)
526            .min_by_key(|s| s.end_line - s.line);
527        if let Some(caller) = caller {
528            edges.push(CallEdge {
529                caller: caller.name.clone(),
530                callee: site.callee.clone(),
531                caller_line: caller.line,
532                call_site_line: site.line,
533            });
534        }
535    }
536    edges
537}
538
539pub fn code_symbol_node_id(name: &str) -> String {
540    format!("code.symbol:{name}")
541}
542
543pub fn code_route_node_id(framework: &str, method: Option<&str>, path: &str) -> String {
544    format!(
545        "code.route:{}:{}:{}",
546        framework,
547        method.unwrap_or("any"),
548        path
549    )
550}
551
552pub fn project_call_edges(
553    edges: &[CallEdge],
554    provenance: Option<GraphProvenance>,
555) -> GraphProjection {
556    let mut nodes = BTreeMap::<String, GraphNode>::new();
557    let mut projected_edges = Vec::with_capacity(edges.len());
558
559    for edge in edges {
560        let caller_id = code_symbol_node_id(&edge.caller);
561        let callee_id = code_symbol_node_id(&edge.callee);
562        for (id, label) in [(&caller_id, &edge.caller), (&callee_id, &edge.callee)] {
563            nodes.entry(id.clone()).or_insert_with(|| {
564                let mut node = GraphNode::new(id.clone(), "code_symbol", label.clone());
565                if let Some(provenance) = provenance.clone() {
566                    node = node.with_provenance(provenance);
567                }
568                node
569            });
570        }
571
572        let mut projected = GraphEdge::new(caller_id, callee_id, "calls")
573            .with_property("caller_line", edge.caller_line.to_string())
574            .with_property("call_site_line", edge.call_site_line.to_string());
575        if let Some(provenance) = provenance.clone() {
576            projected = projected.with_provenance(provenance);
577        }
578        projected_edges.push(projected);
579    }
580
581    GraphProjection {
582        nodes: nodes.into_values().collect(),
583        edges: projected_edges,
584    }
585}
586
587pub fn project_routes(
588    routes: &[RouteSite],
589    provenance: Option<GraphProvenance>,
590) -> GraphProjection {
591    let mut nodes = BTreeMap::<String, GraphNode>::new();
592    let mut projected_edges = Vec::with_capacity(routes.len());
593
594    for route in routes {
595        let route_id = code_route_node_id(&route.framework, route.method.as_deref(), &route.path);
596        let handler_id = code_symbol_node_id(&route.handler);
597        let mut route_node = GraphNode::new(
598            route_id.clone(),
599            "route",
600            format!(
601                "{} {}",
602                route.method.as_deref().unwrap_or("any").to_uppercase(),
603                route.path
604            ),
605        )
606        .with_property("framework", route.framework.clone())
607        .with_property("path", route.path.clone())
608        .with_property("handler", route.handler.clone())
609        .with_property("line", route.line.to_string());
610        if let Some(method) = &route.method {
611            route_node = route_node.with_property("method", method.clone());
612        }
613        if let Some(provenance) = provenance.clone() {
614            route_node = route_node.with_provenance(provenance);
615        }
616        nodes.entry(route_id.clone()).or_insert(route_node);
617
618        nodes.entry(handler_id.clone()).or_insert_with(|| {
619            let mut node = GraphNode::new(handler_id.clone(), "code_symbol", route.handler.clone());
620            if let Some(provenance) = provenance.clone() {
621                node = node.with_provenance(provenance);
622            }
623            node
624        });
625
626        let mut edge = GraphEdge::new(route_id, handler_id, "handled_by")
627            .with_property("route_path", route.path.clone())
628            .with_property("framework", route.framework.clone());
629        if let Some(method) = &route.method {
630            edge = edge.with_property("method", method.clone());
631        }
632        if let Some(provenance) = provenance.clone() {
633            edge = edge.with_provenance(provenance);
634        }
635        projected_edges.push(edge);
636    }
637
638    GraphProjection {
639        nodes: nodes.into_values().collect(),
640        edges: projected_edges,
641    }
642}
643
644#[derive(Debug, Clone, Serialize, Deserialize)]
645pub struct CommunityMemberRef {
646    pub file: String,
647    pub line: i64,
648    pub role: String,
649    pub peer: String,
650}
651
652#[derive(Debug, Clone, Serialize, Deserialize)]
653pub struct CommunityMember {
654    pub name: String,
655    #[serde(skip_serializing_if = "Option::is_none", default)]
656    pub file: Option<String>,
657    #[serde(skip_serializing_if = "Option::is_none", default)]
658    pub line: Option<i64>,
659    #[serde(skip_serializing_if = "Vec::is_empty", default)]
660    pub refs: Vec<CommunityMemberRef>,
661    #[serde(skip_serializing_if = "Option::is_none", default)]
662    pub tagpath_handle: Option<String>,
663}
664
665impl CommunityMember {
666    pub fn new(name: impl Into<String>) -> Self {
667        Self {
668            name: name.into(),
669            file: None,
670            line: None,
671            refs: Vec::new(),
672            tagpath_handle: None,
673        }
674    }
675}
676
677#[derive(Debug, Clone, Serialize, Deserialize)]
678pub struct TerseCommunityMember {
679    pub name: String,
680    #[serde(skip_serializing_if = "Option::is_none", default)]
681    pub tagpath_handle: Option<String>,
682}
683
684impl From<&CommunityMember> for TerseCommunityMember {
685    fn from(m: &CommunityMember) -> Self {
686        Self {
687            name: m.name.clone(),
688            tagpath_handle: m.tagpath_handle.clone(),
689        }
690    }
691}
692
693#[derive(Debug, Clone, Serialize, Deserialize)]
694pub struct TerseCommunity {
695    pub id: usize,
696    pub members: Vec<TerseCommunityMember>,
697    pub modularity_contribution: f64,
698}
699
700impl TerseCommunity {
701    pub fn from_community(community: &Community, top_n: usize) -> Self {
702        let members: Vec<TerseCommunityMember> = community
703            .members
704            .iter()
705            .take(top_n)
706            .map(TerseCommunityMember::from)
707            .collect();
708        Self {
709            id: community.id,
710            members,
711            modularity_contribution: community.modularity_contribution,
712        }
713    }
714}
715
716#[derive(Debug, Clone, Serialize, Deserialize)]
717pub struct Community {
718    pub id: usize,
719    pub members: Vec<CommunityMember>,
720    pub modularity_contribution: f64,
721}
722
723#[derive(Debug, Clone, Serialize, Deserialize)]
724pub struct CommunityResult {
725    pub communities: Vec<Community>,
726    pub modularity: f64,
727    pub iterations: usize,
728    pub node_count: usize,
729    pub edge_count: usize,
730}
731
732#[derive(Debug, Clone, Serialize, Deserialize)]
733pub struct TerseCommunityResult {
734    pub communities: Vec<TerseCommunity>,
735    pub modularity: f64,
736    pub iterations: usize,
737    pub node_count: usize,
738    pub edge_count: usize,
739}
740
741impl CommunityResult {
742    pub fn to_terse(&self, top_n: usize) -> TerseCommunityResult {
743        TerseCommunityResult {
744            communities: self
745                .communities
746                .iter()
747                .map(|c| TerseCommunity::from_community(c, top_n))
748                .collect(),
749            modularity: self.modularity,
750            iterations: self.iterations,
751            node_count: self.node_count,
752            edge_count: self.edge_count,
753        }
754    }
755}
756
757struct LouvainGraph {
758    n: usize,
759    adj: Vec<HashMap<usize, f64>>,
760    degree: Vec<f64>,
761    m: f64,
762}
763
764impl LouvainGraph {
765    fn from_indexed(n: usize, adj: Vec<HashSet<usize>>) -> Self {
766        let degree: Vec<f64> = adj.iter().map(|nb| nb.len() as f64).collect();
767        let m = degree.iter().sum::<f64>() / 2.0;
768        let weighted: Vec<HashMap<usize, f64>> = adj
769            .iter()
770            .map(|nb| nb.iter().map(|&j| (j, 1.0_f64)).collect())
771            .collect();
772        Self {
773            n,
774            adj: weighted,
775            degree,
776            m,
777        }
778    }
779
780    fn phase1(&self) -> (Vec<usize>, usize, bool) {
781        let n = self.n;
782        let m = self.m;
783        let mut community: Vec<usize> = (0..n).collect();
784        let mut comm_degree = self.degree.clone();
785        let mut ki_in: Vec<HashMap<usize, f64>> = (0..n)
786            .map(|i| {
787                let mut map = HashMap::new();
788                for (&nb, &w) in &self.adj[i] {
789                    *map.entry(community[nb]).or_insert(0.0) += w;
790                }
791                map
792            })
793            .collect();
794
795        let mut iterations = 0;
796        let mut any_improved = false;
797        loop {
798            let mut improved = false;
799            iterations += 1;
800
801            for i in 0..n {
802                let cur_c = community[i];
803                let ki = self.degree[i];
804
805                let ki_in_cur = ki_in[i].get(&cur_c).copied().unwrap_or(0.0);
806                let cur_gain = ki_in_cur / m - ki * (comm_degree[cur_c] - ki) / (2.0 * m * m);
807
808                let mut best_delta = 0.0f64;
809                let mut best_c = cur_c;
810
811                for (&c, &ki_in_c) in &ki_in[i] {
812                    if c == cur_c {
813                        continue;
814                    }
815                    let target_gain = ki_in_c / m - ki * comm_degree[c] / (2.0 * m * m);
816                    let delta = target_gain - cur_gain;
817                    if delta > best_delta {
818                        best_delta = delta;
819                        best_c = c;
820                    }
821                }
822
823                if best_c != cur_c {
824                    comm_degree[cur_c] -= ki;
825                    comm_degree[best_c] += ki;
826                    for (&nb, &w) in &self.adj[i] {
827                        ki_in[nb].entry(cur_c).and_modify(|v| *v -= w).or_insert(-w);
828                        *ki_in[nb].entry(best_c).or_insert(0.0) += w;
829                    }
830                    community[i] = best_c;
831                    improved = true;
832                    any_improved = true;
833                }
834            }
835
836            if !improved || iterations >= 100 {
837                break;
838            }
839        }
840        (community, iterations, any_improved)
841    }
842
843    fn coarsen(&self, community: &[usize]) -> LouvainGraph {
844        let mut remap = HashMap::new();
845        for &c in community {
846            if !remap.contains_key(&c) {
847                let idx = remap.len();
848                remap.insert(c, idx);
849            }
850        }
851        let n2 = remap.len();
852        let mut adj2: Vec<HashMap<usize, f64>> = vec![HashMap::new(); n2];
853
854        for i in 0..self.n {
855            let ci = remap[&community[i]];
856            for (&j, &w) in &self.adj[i] {
857                let cj = remap[&community[j]];
858                if ci == cj {
859                    *adj2[ci].entry(ci).or_insert(0.0) += w / 2.0;
860                } else {
861                    *adj2[ci].entry(cj).or_insert(0.0) += w;
862                }
863            }
864        }
865
866        LouvainGraph::from_weighted(n2, adj2)
867    }
868
869    #[allow(dead_code)]
870    fn from_weighted(n: usize, adj: Vec<HashMap<usize, f64>>) -> Self {
871        let degree: Vec<f64> = (0..n).map(|i| adj[i].values().sum::<f64>()).collect();
872        let m = degree.iter().sum::<f64>() / 2.0;
873        Self { n, adj, degree, m }
874    }
875}
876
877pub fn detect_communities(edges: &[(String, String)]) -> CommunityResult {
878    if edges.is_empty() {
879        return CommunityResult {
880            communities: Vec::new(),
881            modularity: 0.0,
882            iterations: 0,
883            node_count: 0,
884            edge_count: 0,
885        };
886    }
887
888    let mut node_vec: Vec<String> = Vec::new();
889    let mut node_idx: HashMap<String, usize> = HashMap::new();
890    for (a, b) in edges {
891        for name in [a, b] {
892            if !node_idx.contains_key(name) {
893                node_idx.insert(name.clone(), node_vec.len());
894                node_vec.push(name.clone());
895            }
896        }
897    }
898    let n = node_vec.len();
899
900    let mut adj: Vec<HashSet<usize>> = vec![HashSet::new(); n];
901    for (a, b) in edges {
902        let ai = node_idx[a];
903        let bi = node_idx[b];
904        if ai != bi {
905            adj[ai].insert(bi);
906            adj[bi].insert(ai);
907        }
908    }
909
910    let m = adj.iter().map(|nb| nb.len() as f64).sum::<f64>() / 2.0;
911
912    if m == 0.0 {
913        let communities = node_vec
914            .iter()
915            .enumerate()
916            .map(|(i, name)| Community {
917                id: i,
918                members: vec![CommunityMember::new(name.clone())],
919                modularity_contribution: 0.0,
920            })
921            .collect();
922        return CommunityResult {
923            communities,
924            modularity: 0.0,
925            iterations: 0,
926            node_count: n,
927            edge_count: 0,
928        };
929    }
930
931    let graph = LouvainGraph::from_indexed(n, adj);
932    let mut total_iterations = 0;
933    let original_degrees: Vec<f64> = graph.degree.clone();
934
935    let (community, iter1, _) = graph.phase1();
936    total_iterations += iter1;
937
938    let mut level_assignment = community;
939    let mut current_graph = graph;
940
941    for _level in 0..10 {
942        let coarse = current_graph.coarsen(&level_assignment);
943        if coarse.n == current_graph.n {
944            break;
945        }
946        let (coarse_community, iters, improved) = coarse.phase1();
947        total_iterations += iters;
948        if !improved {
949            break;
950        }
951
952        let mut remap = HashMap::new();
953        for &c in &level_assignment {
954            if !remap.contains_key(&c) {
955                let idx = remap.len();
956                remap.insert(c, idx);
957            }
958        }
959
960        let mut final_community = vec![0usize; n];
961        for i in 0..n {
962            let coarse_node = remap[&level_assignment[i]];
963            final_community[i] = coarse_community[coarse_node];
964        }
965
966        let mut final_remap = HashMap::new();
967        let mut next_id = 0usize;
968        for c in &final_community {
969            if let Some(&_id) = final_remap.get(c) {
970                continue;
971            }
972            final_remap.insert(*c, next_id);
973            next_id += 1;
974        }
975        for i in 0..n {
976            final_community[i] = final_remap[&final_community[i]];
977        }
978
979        level_assignment = final_community;
980        current_graph = coarse;
981    }
982
983    let community = level_assignment;
984
985    let mut node_to_comm: HashMap<String, usize> = HashMap::new();
986    for (i, &c) in community.iter().enumerate() {
987        node_to_comm.insert(node_vec[i].clone(), c);
988    }
989
990    let mut comm_members: HashMap<usize, Vec<String>> = HashMap::new();
991    let mut comm_internal: HashMap<usize, f64> = HashMap::new();
992    let mut comm_degree_map: HashMap<usize, f64> = HashMap::new();
993
994    for (i, &c) in community.iter().enumerate() {
995        comm_members.entry(c).or_default().push(node_vec[i].clone());
996        *comm_degree_map.entry(c).or_insert(0.0) += original_degrees[i];
997    }
998    for (a, b) in edges {
999        let ca = node_to_comm[a];
1000        let cb = node_to_comm[b];
1001        if ca == cb {
1002            *comm_internal.entry(ca).or_insert(0.0) += 1.0;
1003        }
1004    }
1005
1006    let mut total_modularity = 0.0;
1007    let mut communities: Vec<Community> = comm_members
1008        .into_iter()
1009        .map(|(id, mut members)| {
1010            members.sort();
1011            let lc = comm_internal.get(&id).copied().unwrap_or(0.0);
1012            let dc = comm_degree_map[&id];
1013            let mod_contrib = lc / m - (dc / (2.0 * m)).powi(2);
1014            total_modularity += mod_contrib;
1015            Community {
1016                id,
1017                members: members.into_iter().map(CommunityMember::new).collect(),
1018                modularity_contribution: mod_contrib,
1019            }
1020        })
1021        .collect();
1022
1023    communities.sort_by(|a, b| b.members.len().cmp(&a.members.len()).then(a.id.cmp(&b.id)));
1024
1025    CommunityResult {
1026        communities,
1027        modularity: total_modularity,
1028        iterations: total_iterations,
1029        node_count: n,
1030        edge_count: m as usize,
1031    }
1032}
1033
1034#[derive(Debug, Clone, Serialize)]
1035pub struct PathNode {
1036    pub name: String,
1037    #[serde(skip_serializing_if = "Option::is_none", default)]
1038    pub tagpath_handle: Option<String>,
1039}
1040
1041impl PathNode {
1042    pub fn new(name: impl Into<String>) -> Self {
1043        Self {
1044            name: name.into(),
1045            tagpath_handle: None,
1046        }
1047    }
1048}
1049
1050#[derive(Debug, Clone, Serialize)]
1051pub struct PathResult {
1052    pub from: String,
1053    pub to: String,
1054    pub path: Vec<PathNode>,
1055    pub hops: usize,
1056}
1057
1058pub fn shortest_path(edges: &[(String, String)], from: &str, to: &str) -> Option<PathResult> {
1059    if from == to {
1060        return Some(PathResult {
1061            from: from.to_string(),
1062            to: to.to_string(),
1063            path: vec![PathNode::new(from)],
1064            hops: 0,
1065        });
1066    }
1067
1068    let mut adj: HashMap<&str, HashSet<&str>> = HashMap::new();
1069    for (a, b) in edges {
1070        if a == b {
1071            continue;
1072        }
1073        adj.entry(a.as_str()).or_default().insert(b.as_str());
1074        adj.entry(b.as_str()).or_default().insert(a.as_str());
1075    }
1076
1077    if !adj.contains_key(from) || !adj.contains_key(to) {
1078        return None;
1079    }
1080
1081    let mut visited: HashSet<&str> = HashSet::new();
1082    let mut queue: VecDeque<&str> = VecDeque::new();
1083    let mut parent: HashMap<&str, &str> = HashMap::new();
1084
1085    visited.insert(from);
1086    queue.push_back(from);
1087
1088    while let Some(current) = queue.pop_front() {
1089        if let Some(neighbors) = adj.get(current) {
1090            for &neighbor in neighbors {
1091                if visited.insert(neighbor) {
1092                    parent.insert(neighbor, current);
1093                    if neighbor == to {
1094                        let mut path = vec![PathNode::new(to)];
1095                        let mut curr = to;
1096                        while let Some(&p) = parent.get(curr) {
1097                            path.push(PathNode::new(p));
1098                            curr = p;
1099                        }
1100                        path.reverse();
1101                        let hops = path.len() - 1;
1102                        return Some(PathResult {
1103                            from: from.to_string(),
1104                            to: to.to_string(),
1105                            path,
1106                            hops,
1107                        });
1108                    }
1109                    queue.push_back(neighbor);
1110                }
1111            }
1112        }
1113    }
1114
1115    None
1116}
1117
1118#[cfg(test)]
1119mod tests {
1120    use super::*;
1121
1122    #[cfg(feature = "lang-rust")]
1123    #[test]
1124    fn rust_direct_call() {
1125        let source = b"fn helper() {}\nfn main() { helper(); }";
1126        let sites = extract_call_sites(Lang::Rust, source).unwrap();
1127        assert!(
1128            sites.iter().any(|s| s.callee == "helper"),
1129            "got: {:?}",
1130            sites
1131        );
1132    }
1133
1134    #[cfg(feature = "lang-rust")]
1135    #[test]
1136    fn rust_method_call() {
1137        let source = b"fn main() { vec.push(1); }";
1138        let sites = extract_call_sites(Lang::Rust, source).unwrap();
1139        assert!(sites.iter().any(|s| s.callee == "push"), "got: {:?}", sites);
1140    }
1141
1142    #[cfg(feature = "lang-rust")]
1143    #[test]
1144    fn rust_scoped_call() {
1145        let source = b"fn main() { Vec::new(); }";
1146        let sites = extract_call_sites(Lang::Rust, source).unwrap();
1147        assert!(sites.iter().any(|s| s.callee == "new"), "got: {:?}", sites);
1148    }
1149
1150    #[cfg(feature = "lang-rust")]
1151    #[test]
1152    fn rust_macro_call() {
1153        let source = b"fn main() { println!(\"hi\"); }";
1154        let sites = extract_call_sites(Lang::Rust, source).unwrap();
1155        assert!(
1156            sites.iter().any(|s| s.callee == "println"),
1157            "got: {:?}",
1158            sites
1159        );
1160    }
1161
1162    #[cfg(feature = "lang-rust")]
1163    #[test]
1164    fn rust_axum_route_extracted() {
1165        let source = br#"fn router() {
1166    Router::new().route("/users", get(list_users));
1167}
1168fn list_users() {}
1169"#;
1170        let routes = extract_route_sites(Lang::Rust, source).unwrap();
1171        assert!(routes.iter().any(|route| {
1172            route.framework == "axum"
1173                && route.method.as_deref() == Some("get")
1174                && route.path == "/users"
1175                && route.handler == "list_users"
1176        }));
1177    }
1178
1179    #[cfg(feature = "lang-rust")]
1180    #[test]
1181    fn rust_actix_route_attribute_extracted() {
1182        let source = br#"#[post("/submit")]
1183async fn submit_form() {}
1184"#;
1185        let routes = extract_route_sites(Lang::Rust, source).unwrap();
1186        assert_eq!(routes.len(), 1);
1187        assert_eq!(routes[0].framework, "actix");
1188        assert_eq!(routes[0].method.as_deref(), Some("post"));
1189        assert_eq!(routes[0].handler, "submit_form");
1190    }
1191
1192    #[cfg(feature = "lang-python")]
1193    #[test]
1194    fn python_direct_call() {
1195        let source = b"def helper(): pass\ndef main(): helper()";
1196        let sites = extract_call_sites(Lang::Python, source).unwrap();
1197        assert!(
1198            sites.iter().any(|s| s.callee == "helper"),
1199            "got: {:?}",
1200            sites
1201        );
1202    }
1203
1204    #[cfg(feature = "lang-python")]
1205    #[test]
1206    fn python_method_call() {
1207        let source = b"def main(): obj.method()";
1208        let sites = extract_call_sites(Lang::Python, source).unwrap();
1209        assert!(
1210            sites.iter().any(|s| s.callee == "method"),
1211            "got: {:?}",
1212            sites
1213        );
1214    }
1215
1216    #[cfg(feature = "lang-python")]
1217    #[test]
1218    fn python_fastapi_route_extracted() {
1219        let source = br#"@router.get("/items/{item_id}")
1220def read_item(item_id: str):
1221    return item_id
1222"#;
1223        let routes = extract_route_sites(Lang::Python, source).unwrap();
1224        assert_eq!(routes.len(), 1);
1225        assert_eq!(routes[0].framework, "fastapi");
1226        assert_eq!(routes[0].method.as_deref(), Some("get"));
1227        assert_eq!(routes[0].path, "/items/{item_id}");
1228        assert_eq!(routes[0].handler, "read_item");
1229    }
1230
1231    #[cfg(feature = "lang-typescript")]
1232    #[test]
1233    fn typescript_direct_call() {
1234        let source = b"function helper() {}\nfunction main() { helper(); }";
1235        let sites = extract_call_sites(Lang::TypeScript, source).unwrap();
1236        assert!(
1237            sites.iter().any(|s| s.callee == "helper"),
1238            "got: {:?}",
1239            sites
1240        );
1241    }
1242
1243    #[cfg(feature = "lang-typescript")]
1244    #[test]
1245    fn typescript_method_call() {
1246        let source = b"function main() { arr.push(1); }";
1247        let sites = extract_call_sites(Lang::TypeScript, source).unwrap();
1248        assert!(sites.iter().any(|s| s.callee == "push"), "got: {:?}", sites);
1249    }
1250
1251    #[cfg(feature = "lang-typescript")]
1252    #[test]
1253    fn typescript_express_route_extracted() {
1254        let source = br#"router.post("/users", createUser);
1255function createUser() {}
1256"#;
1257        let routes = extract_route_sites(Lang::TypeScript, source).unwrap();
1258        assert_eq!(routes.len(), 1);
1259        assert_eq!(routes[0].framework, "express");
1260        assert_eq!(routes[0].method.as_deref(), Some("post"));
1261        assert_eq!(routes[0].path, "/users");
1262        assert_eq!(routes[0].handler, "createUser");
1263    }
1264
1265    #[cfg(feature = "lang-javascript")]
1266    #[test]
1267    fn javascript_call() {
1268        let source = b"function main() { helper(); obj.method(); }";
1269        let sites = extract_call_sites(Lang::JavaScript, source).unwrap();
1270        assert!(
1271            sites.iter().any(|s| s.callee == "helper"),
1272            "got: {:?}",
1273            sites
1274        );
1275        assert!(
1276            sites.iter().any(|s| s.callee == "method"),
1277            "got: {:?}",
1278            sites
1279        );
1280    }
1281
1282    #[cfg(feature = "lang-rust")]
1283    fn test_symbol(name: &str, line: usize, end_line: usize) -> Symbol {
1284        Symbol {
1285            name: name.into(),
1286            kind: "function".into(),
1287            line,
1288            end_line,
1289            node_kind: "function_item".into(),
1290            start_byte: line,
1291            end_byte: end_line,
1292            body_start_byte: None,
1293            body_end_byte: None,
1294        }
1295    }
1296
1297    #[cfg(feature = "lang-rust")]
1298    #[test]
1299    fn resolve_edges_basic() {
1300        let symbols = vec![test_symbol("main", 1, 3), test_symbol("helper", 5, 7)];
1301        let sites = vec![
1302            CallSite {
1303                callee: "helper".into(),
1304                line: 2,
1305            },
1306            CallSite {
1307                callee: "println".into(),
1308                line: 6,
1309            },
1310        ];
1311        let edges = resolve_edges(&symbols, &sites);
1312        assert_eq!(edges.len(), 2);
1313        assert_eq!(edges[0].caller, "main");
1314        assert_eq!(edges[0].callee, "helper");
1315        assert_eq!(edges[1].caller, "helper");
1316        assert_eq!(edges[1].callee, "println");
1317    }
1318
1319    #[cfg(feature = "lang-rust")]
1320    #[test]
1321    fn resolve_edges_nested_picks_innermost() {
1322        let symbols = vec![test_symbol("outer", 0, 10), test_symbol("inner", 2, 5)];
1323        let sites = vec![CallSite {
1324            callee: "foo".into(),
1325            line: 3,
1326        }];
1327        let edges = resolve_edges(&symbols, &sites);
1328        assert_eq!(edges.len(), 1);
1329        assert_eq!(edges[0].caller, "inner");
1330    }
1331
1332    #[cfg(feature = "lang-rust")]
1333    #[test]
1334    fn resolve_edges_top_level_call_excluded() {
1335        let symbols = vec![test_symbol("main", 5, 10)];
1336        let sites = vec![CallSite {
1337            callee: "foo".into(),
1338            line: 2,
1339        }];
1340        let edges = resolve_edges(&symbols, &sites);
1341        assert!(edges.is_empty());
1342    }
1343
1344    #[test]
1345    fn resolve_edges_cache_reuses_slots_until_mtime_or_hash_changes() {
1346        let cache = ResolveEdgesCache::new();
1347        let file = std::path::Path::new("src/lib.rs");
1348        let symbols = vec![test_symbol("main", 1, 3)];
1349        let sites = vec![CallSite {
1350            callee: "helper".into(),
1351            line: 2,
1352        }];
1353
1354        let first =
1355            cache.resolve_edges_for_file(file, "hash-a", FileMtime::new(10, 0), &symbols, &sites);
1356        assert_eq!(first.len(), 1);
1357        assert_eq!(cache.stats(), (0, 1));
1358
1359        let cached =
1360            cache.resolve_edges_for_file(file, "hash-a", FileMtime::new(10, 0), &symbols, &sites);
1361        assert_eq!(cached, first);
1362        assert_eq!(cache.stats(), (1, 1));
1363
1364        let refreshed =
1365            cache.resolve_edges_for_file(file, "hash-a", FileMtime::new(11, 0), &symbols, &sites);
1366        assert_eq!(refreshed, first);
1367        assert_eq!(cache.stats(), (1, 2));
1368
1369        let new_hash =
1370            cache.resolve_edges_for_file(file, "hash-b", FileMtime::new(11, 0), &symbols, &sites);
1371        assert_eq!(new_hash, first);
1372        assert_eq!(cache.stats(), (1, 3));
1373    }
1374
1375    #[test]
1376    fn project_call_edges_to_provider_neutral_substrate() {
1377        let edges = vec![CallEdge {
1378            caller: "main".into(),
1379            callee: "helper".into(),
1380            caller_line: 10,
1381            call_site_line: 12,
1382        }];
1383        let projection = project_call_edges(
1384            &edges,
1385            Some(GraphProvenance::new("tsift.index", "src/main.rs")),
1386        );
1387
1388        assert_eq!(projection.nodes.len(), 2);
1389        assert_eq!(projection.edges.len(), 1);
1390        assert!(
1391            projection
1392                .nodes
1393                .iter()
1394                .any(|node| node.id == code_symbol_node_id("main") && node.kind == "code_symbol")
1395        );
1396    }
1397
1398    #[test]
1399    fn project_routes_to_provider_neutral_substrate() {
1400        let routes = vec![RouteSite {
1401            framework: "fastapi".into(),
1402            method: Some("get".into()),
1403            path: "/items".into(),
1404            handler: "list_items".into(),
1405            line: 3,
1406            handler_line: Some(4),
1407        }];
1408        let projection = project_routes(
1409            &routes,
1410            Some(GraphProvenance::new("tsift.index", "src/api.py")),
1411        );
1412
1413        assert!(
1414            projection
1415                .nodes
1416                .iter()
1417                .any(|node| node.kind == "route" && node.label == "GET /items")
1418        );
1419        assert!(projection.edges.iter().any(|edge| edge.kind == "handled_by"
1420            && edge.properties.get("route_path") == Some(&"/items".to_string())));
1421    }
1422
1423    #[test]
1424    fn no_call_query_returns_empty() {
1425        #[cfg(feature = "lang-markdown")]
1426        {
1427            let sites = extract_call_sites(Lang::Markdown, b"# Hello").unwrap();
1428            assert!(sites.is_empty());
1429        }
1430    }
1431
1432    #[cfg(feature = "lang-rust")]
1433    #[test]
1434    fn full_roundtrip_rust() {
1435        let source = b"fn helper() { println!(\"hi\"); }\nfn main() { helper(); Vec::new(); }";
1436        let symbols = Lang::Rust.extract_symbols(source).unwrap();
1437        let sites = extract_call_sites(Lang::Rust, source).unwrap();
1438        let edges = resolve_edges(&symbols, &sites);
1439        let main_calls: Vec<&str> = edges
1440            .iter()
1441            .filter(|e| e.caller == "main")
1442            .map(|e| e.callee.as_str())
1443            .collect();
1444        assert!(
1445            main_calls.contains(&"helper"),
1446            "main should call helper, got: {:?}",
1447            main_calls
1448        );
1449        assert!(
1450            main_calls.contains(&"new"),
1451            "main should call new, got: {:?}",
1452            main_calls
1453        );
1454    }
1455
1456    fn s(a: &str, b: &str) -> (String, String) {
1457        (a.to_string(), b.to_string())
1458    }
1459
1460    #[test]
1461    fn communities_empty_graph() {
1462        let result = detect_communities(&[]);
1463        assert_eq!(result.node_count, 0);
1464        assert_eq!(result.edge_count, 0);
1465        assert!(result.communities.is_empty());
1466        assert_eq!(result.iterations, 0);
1467    }
1468
1469    #[test]
1470    fn communities_single_edge() {
1471        let edges = vec![s("a", "b")];
1472        let result = detect_communities(&edges);
1473        assert_eq!(result.node_count, 2);
1474        assert_eq!(result.edge_count, 1);
1475        assert_eq!(result.communities.len(), 1);
1476        assert_eq!(result.communities[0].members.len(), 2);
1477    }
1478
1479    #[test]
1480    fn communities_self_loop_ignored() {
1481        let edges = vec![s("a", "a"), s("a", "b")];
1482        let result = detect_communities(&edges);
1483        assert_eq!(result.node_count, 2);
1484        assert_eq!(result.edge_count, 1);
1485    }
1486
1487    #[test]
1488    fn communities_duplicate_edges_deduplicated() {
1489        let edges = vec![
1490            s("main", "helper"),
1491            s("main", "helper"),
1492            s("main", "helper"),
1493        ];
1494        let result = detect_communities(&edges);
1495        assert_eq!(result.node_count, 2);
1496        assert_eq!(result.edge_count, 1);
1497    }
1498
1499    #[test]
1500    fn communities_two_cliques_split() {
1501        let edges = vec![
1502            s("a", "b"),
1503            s("a", "c"),
1504            s("b", "c"),
1505            s("d", "e"),
1506            s("d", "f"),
1507            s("e", "f"),
1508            s("a", "d"),
1509        ];
1510        let result = detect_communities(&edges);
1511        assert_eq!(result.node_count, 6);
1512        assert_eq!(
1513            result.communities.len(),
1514            2,
1515            "expected 2 communities, got: {:?}",
1516            result
1517                .communities
1518                .iter()
1519                .map(|c| &c.members)
1520                .collect::<Vec<_>>()
1521        );
1522        assert_eq!(result.communities[0].members.len(), 3);
1523        assert_eq!(result.communities[1].members.len(), 3);
1524        assert!(result.modularity > 0.0);
1525    }
1526
1527    #[test]
1528    fn communities_disconnected_components() {
1529        let edges = vec![s("a", "b"), s("c", "d")];
1530        let result = detect_communities(&edges);
1531        assert_eq!(result.node_count, 4);
1532        assert_eq!(result.edge_count, 2);
1533        assert!(result.modularity >= 0.0);
1534    }
1535
1536    #[test]
1537    fn communities_modularity_non_negative_for_clustered() {
1538        let edges = vec![
1539            s("a", "b"),
1540            s("a", "c"),
1541            s("b", "c"),
1542            s("d", "e"),
1543            s("d", "f"),
1544            s("e", "f"),
1545        ];
1546        let result = detect_communities(&edges);
1547        assert!(result.modularity >= 0.0, "Q={}", result.modularity);
1548    }
1549
1550    #[test]
1551    fn communities_hierarchical_phase2_improves_modularity() {
1552        let mut edges = Vec::new();
1553        for cluster in 0..4 {
1554            let base = cluster * 6;
1555            for i in 0..6 {
1556                for j in (i + 1)..6 {
1557                    edges.push((
1558                        format!("c{}n{}", cluster, base + i),
1559                        format!("c{}n{}", cluster, base + j),
1560                    ));
1561                }
1562            }
1563        }
1564        edges.push(("c0n0".to_string(), "c1n6".to_string()));
1565        edges.push(("c2n12".to_string(), "c3n18".to_string()));
1566        edges.push(("c0n1".to_string(), "c2n12".to_string()));
1567
1568        let result = detect_communities(&edges);
1569        assert!(result.modularity > 0.0, "Q={}", result.modularity);
1570        assert!(
1571            result.communities.len() >= 2,
1572            "expected >= 2 communities for hierarchical structure, got {}",
1573            result.communities.len()
1574        );
1575        assert!(result.iterations >= 1);
1576    }
1577
1578    fn path_names(result: &PathResult) -> Vec<&str> {
1579        result.path.iter().map(|n| n.name.as_str()).collect()
1580    }
1581
1582    #[test]
1583    fn path_direct_neighbors() {
1584        let edges = vec![s("a", "b")];
1585        let result = shortest_path(&edges, "a", "b").unwrap();
1586        assert_eq!(path_names(&result), vec!["a", "b"]);
1587        assert_eq!(result.hops, 1);
1588        assert!(result.path.iter().all(|n| n.tagpath_handle.is_none()));
1589    }
1590
1591    #[test]
1592    fn path_two_hops() {
1593        let edges = vec![s("a", "b"), s("b", "c")];
1594        let result = shortest_path(&edges, "a", "c").unwrap();
1595        assert_eq!(result.hops, 2);
1596        assert_eq!(result.path.first().unwrap().name, "a");
1597        assert_eq!(result.path.last().unwrap().name, "c");
1598    }
1599
1600    #[test]
1601    fn path_same_node() {
1602        let edges = vec![s("a", "b")];
1603        let result = shortest_path(&edges, "a", "a").unwrap();
1604        assert_eq!(path_names(&result), vec!["a"]);
1605        assert_eq!(result.hops, 0);
1606    }
1607
1608    #[test]
1609    fn path_no_connection() {
1610        let edges = vec![s("a", "b"), s("c", "d")];
1611        assert!(shortest_path(&edges, "a", "c").is_none());
1612    }
1613
1614    #[test]
1615    fn path_unknown_node() {
1616        let edges = vec![s("a", "b")];
1617        assert!(shortest_path(&edges, "a", "z").is_none());
1618    }
1619
1620    #[test]
1621    fn path_prefers_shorter() {
1622        let edges = vec![s("a", "b"), s("b", "c"), s("a", "c")];
1623        let result = shortest_path(&edges, "a", "c").unwrap();
1624        assert_eq!(result.hops, 1);
1625    }
1626
1627    #[test]
1628    fn path_self_loop_ignored() {
1629        let edges = vec![s("a", "a"), s("a", "b")];
1630        let result = shortest_path(&edges, "a", "b").unwrap();
1631        assert_eq!(result.hops, 1);
1632    }
1633
1634    #[test]
1635    fn terse_community_drops_optional_fields() {
1636        let member = CommunityMember {
1637            name: "foo".to_string(),
1638            file: Some("src/lib.rs".to_string()),
1639            line: Some(42),
1640            refs: vec![CommunityMemberRef {
1641                file: "src/lib.rs".to_string(),
1642                line: 42,
1643                role: "call".to_string(),
1644                peer: "bar".to_string(),
1645            }],
1646            tagpath_handle: Some("foo::lib".to_string()),
1647        };
1648        let terse = TerseCommunityMember::from(&member);
1649        assert_eq!(terse.name, "foo");
1650        assert_eq!(terse.tagpath_handle, Some("foo::lib".to_string()));
1651    }
1652
1653    #[test]
1654    fn terse_community_top_n_truncates_members() {
1655        let community = Community {
1656            id: 0,
1657            members: vec![
1658                CommunityMember::new("a"),
1659                CommunityMember::new("b"),
1660                CommunityMember::new("c"),
1661                CommunityMember::new("d"),
1662                CommunityMember::new("e"),
1663            ],
1664            modularity_contribution: 0.25,
1665        };
1666        let terse = TerseCommunity::from_community(&community, 3);
1667        assert_eq!(terse.id, 0);
1668        assert_eq!(terse.members.len(), 3);
1669        assert_eq!(terse.members[0].name, "a");
1670        assert_eq!(terse.members[2].name, "c");
1671        assert_eq!(terse.modularity_contribution, 0.25);
1672    }
1673
1674    #[test]
1675    fn terse_community_result_from_detect_communities() {
1676        let edges = vec![s("a", "b"), s("b", "c"), s("c", "d")];
1677        let result = detect_communities(&edges);
1678        let terse = result.to_terse(2);
1679        assert_eq!(terse.node_count, result.node_count);
1680        assert_eq!(terse.edge_count, result.edge_count);
1681        assert_eq!(terse.modularity, result.modularity);
1682        assert_eq!(terse.communities.len(), result.communities.len());
1683        for tc in &terse.communities {
1684            assert!(tc.members.len() <= 2);
1685        }
1686    }
1687
1688    #[test]
1689    fn terse_community_json_smaller_than_full() {
1690        let edges: Vec<(String, String)> = (0..20)
1691            .flat_map(|i| {
1692                let base = i * 5;
1693                vec![
1694                    (format!("n{}", base), format!("n{}", base + 1)),
1695                    (format!("n{}", base), format!("n{}", base + 2)),
1696                    (format!("n{}", base + 1), format!("n{}", base + 2)),
1697                    (format!("n{}", base + 2), format!("n{}", base + 3)),
1698                    (format!("n{}", base + 3), format!("n{}", base + 4)),
1699                ]
1700            })
1701            .chain(std::iter::once(("n0".to_string(), "n5".to_string())))
1702            .collect();
1703        let result = detect_communities(&edges);
1704        let terse = result.to_terse(2);
1705        let full_member_count: usize = result.communities.iter().map(|c| c.members.len()).sum();
1706        let terse_member_count: usize = terse.communities.iter().map(|c| c.members.len()).sum();
1707        assert!(
1708            terse_member_count < full_member_count,
1709            "terse members ({}) should be fewer than full ({})",
1710            terse_member_count,
1711            full_member_count
1712        );
1713    }
1714}