ripvec_core/entry_points.rs
1//! Per-language entry-point detection for the `find_dead_code` MCP tool
2//! (4.1.0).
3//!
4//! The trait [`EntryPointDetector`] and its per-language implementors
5//! ([`RustEntryDetector`], [`PythonEntryDetector`], [`GoEntryDetector`])
6//! identify the syntactic shapes that act as roots of the call graph: the
7//! BFS reachability walk for dead-code detection seeds from the union of
8//! all [`EntryPoint`]s emitted across the indexed corpus.
9//!
10//! This module is X1 of the 4.1.0 series; the actual reachability walk and
11//! cluster discovery (`RepoGraph::compute_dead_code`) lands in X2. The MCP
12//! tool wrapper lands in X3. The remaining language detectors land in X4.
13//! See `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 for the per-language
14//! entry-point survey and Section 3 for the algorithm that consumes this
15//! output.
16//!
17//! ## Type B (Wired-Stub) self-audit note
18//!
19//! Until X2 lands, every public item in this module is consumed only from
20//! the integration tests under `crates/ripvec-core/tests/entry_points.rs`.
21//! `scripts/check_wiring_gaps.sh` will report these as Type B findings.
22//! The findings are **explicitly deferred** to X2 — see the Section 9
23//! PLAN.md entry — not silently dangling. Do not annotate with
24//! `#[doc(hidden)]`: the doc-visibility surface is part of the X2 contract
25//! and is the intended public API of the dead-code module.
26
27use std::path::{Path, PathBuf};
28
29use streaming_iterator::StreamingIterator;
30use tree_sitter::{Node, Parser, Query, QueryCursor};
31
32/// Classification of why a [`Definition`](crate::repo_map::Definition)-shaped
33/// item is treated as an entry point for the dead-code reachability walk.
34///
35/// Categories follow Section 2 of `docs/FIND_DEAD_CODE_DESIGN.md`. The
36/// classification is per-detection, not per-definition: the same
37/// `pub fn` can appear as both [`EntryPointKind::Main`] (for binaries) and
38/// [`EntryPointKind::LibraryExport`] (for libraries) depending on how the
39/// containing crate is structured. Downstream consumers (X2) treat each
40/// detection independently.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum EntryPointKind {
43 /// A binary-crate `main`-shaped entry: `fn main()` in Rust, `func main()`
44 /// in Go, the `if __name__ == "__main__"` block in Python.
45 Main,
46
47 /// A public-API surface item: `pub` re-exports in Rust libraries,
48 /// `__all__` exports in Python, capitalised names in Go libraries.
49 LibraryExport,
50
51 /// A test entry: `#[test]` / `#[bench]` in Rust, `def test_*` /
52 /// `*_test.py` in Python, `func TestX` / `BenchmarkX` / `ExampleX` /
53 /// `FuzzX` in Go.
54 Test,
55
56 /// A foreign-function-interface entry: `#[no_mangle]` /
57 /// `extern "C"` in Rust, cgo `//export` in Go.
58 Ffi,
59
60 /// A procedural-macro entry: `#[proc_macro]`, `#[proc_macro_derive]`,
61 /// `#[proc_macro_attribute]` in Rust.
62 ProcMacro,
63
64 /// A package-initialisation entry: `func init()` in Go.
65 Init,
66
67 /// A build-script entry: Cargo's `build.rs`.
68 BuildScript,
69
70 /// A method invoked by a framework-generated dispatcher whose call site
71 /// the static call graph cannot see — e.g. rmcp's `#[tool(...)]`
72 /// methods, whose dispatch table is synthesised by a procedural macro
73 /// at compile time. Without explicit seeding these methods appear dead
74 /// to BFS reachability even though they are the user-facing API.
75 ///
76 /// Added in 4.1.1 (Wave 1 Front A, node A4) after live measurement
77 /// against ripvec itself reported `dead_fraction = 0.986` because
78 /// every `#[tool]`-annotated worker was unreachable from the call
79 /// graph — see `DEV_JOURNAL.md` 4.1.1 entry.
80 FrameworkDispatched,
81}
82
83/// A single entry-point detection in one source file.
84///
85/// Per-detection, not per-definition — the same `pub fn` can produce
86/// multiple `EntryPoint` instances (one for each matching predicate).
87/// Downstream consumers should treat each detection as an independent
88/// reachability seed.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct EntryPoint {
91 /// The symbol name of the entry point. For Rust this is the function
92 /// item identifier; for Python it is the function or module-level
93 /// expression name; for Go it is the function declaration identifier.
94 pub name: String,
95
96 /// Why this item was treated as an entry point.
97 pub kind: EntryPointKind,
98
99 /// The source file the entry point was detected in.
100 pub file_path: PathBuf,
101
102 /// 1-based line number of the entry point declaration. Matches the
103 /// `start_line` field of [`crate::repo_map::Definition`].
104 pub line: u32,
105}
106
107/// Per-language entry-point detector.
108///
109/// Designed for consumption by `RepoGraph::compute_dead_code` in
110/// 4.1.0-X2. Until X2 lands, the only consumers are the integration tests
111/// under `crates/ripvec-core/tests/entry_points.rs` — see the
112/// module-level docstring for the Type B (Wired-Stub) self-audit note.
113///
114/// Implementations parse the source once per call. The parsing cost is
115/// trivial (tree-sitter is O(n) and the source is already in memory at
116/// detection time), and stateless parsers compose more cleanly than a
117/// shared parser cache across the three (and eventually eleven) language
118/// detectors. X2's `RepoGraph::compute_dead_code` already iterates
119/// per-file, so the per-file parse adds no additional walk cost.
120pub trait EntryPointDetector {
121 /// Return every entry point declared in this source file.
122 ///
123 /// `source` is the full UTF-8 contents of `file_path`. The path is
124 /// passed alongside `source` so detectors that consider filename
125 /// patterns (e.g. Python's `test_*.py` and `*_test.py`,
126 /// Rust's `build.rs`) can use both signals.
127 ///
128 /// If parsing fails, returns an empty vector — entry-point detection
129 /// is best-effort and should never abort the dead-code walk.
130 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint>;
131}
132
133// ---------------------------------------------------------------------------
134// Rust detector
135// ---------------------------------------------------------------------------
136
137/// Rust entry-point detector.
138///
139/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 + 4.1.1 Front A
140/// widening):
141/// - `pub fn main()` and bare `fn main()` (Main)
142/// - `pub fn` items in `lib.rs` / `mod.rs` (LibraryExport)
143/// - `pub use` re-exports in `lib.rs` / `mod.rs` (LibraryExport, A1)
144/// - Top-level `fn main` in `examples/*.rs` / `benches/*.rs` (Main, A2)
145/// - Criterion `pub fn benches()` in `benches/*.rs` (Main, A2)
146/// - Items annotated with `#[test]` or `#[bench]` (Test)
147/// - Items annotated with `#[no_mangle]` or marked `extern "C"` (Ffi)
148/// - Items annotated with `#[proc_macro]`, `#[proc_macro_derive]`, or
149/// `#[proc_macro_attribute]` (ProcMacro)
150/// - Methods annotated with `#[tool(...)]` or every method inside
151/// `#[tool_router] impl ...` (FrameworkDispatched, A3)
152/// - The entire `build.rs` file is treated as a single BuildScript entry
153/// point (the build script's `main` is the cargo-known entry).
154///
155/// File-path role detection lives in [`rust_file_role`]: lib/mod files
156/// gain LibraryExport surfacing, examples/benches files surface their
157/// top-level `fn main` and `pub fn` items as cargo-known entries.
158#[derive(Debug, Default, Clone, Copy)]
159pub struct RustEntryDetector;
160
161/// Classification of a Rust source file by its role in the cargo workspace.
162///
163/// The detector uses this to widen entry-point recognition beyond what is
164/// observable from source alone: `examples/*.rs` and `benches/*.rs` files
165/// are cargo-known entries even when their `fn main` carries no annotation.
166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
167enum RustFileRole {
168 /// `src/lib.rs` or any `mod.rs` — the crate's published interface.
169 LibOrMod,
170 /// A file under `examples/` (cargo example binary).
171 Example,
172 /// A file under `benches/` (cargo benchmark target).
173 Bench,
174 /// Anything else.
175 Other,
176}
177
178/// Determine which cargo role a Rust source path plays.
179///
180/// Examples and benches are recognised by any `examples` / `benches`
181/// component in the path — cargo only honours top-level `examples/` and
182/// `benches/` directories per crate, but the path-component check is
183/// sufficient for our entry-point purposes (a synthetic file we test
184/// against may live anywhere on disk).
185fn rust_file_role(file_path: &Path) -> RustFileRole {
186 if matches!(
187 file_path.file_name().and_then(|s| s.to_str()),
188 Some("lib.rs" | "mod.rs")
189 ) {
190 return RustFileRole::LibOrMod;
191 }
192 for component in file_path.components() {
193 match component.as_os_str().to_str() {
194 Some("examples") => return RustFileRole::Example,
195 Some("benches") => return RustFileRole::Bench,
196 _ => {}
197 }
198 }
199 RustFileRole::Other
200}
201
202impl EntryPointDetector for RustEntryDetector {
203 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
204 let mut entries = Vec::new();
205 let Some(tree) = parse_with(source, &tree_sitter_rust::LANGUAGE.into()) else {
206 return entries;
207 };
208 let root = tree.root_node();
209 let bytes = source.as_bytes();
210
211 // Treat the entire build.rs file as a single BuildScript entry.
212 // The crate's main may be named anything inside build.rs (cargo
213 // calls the file's main), so we emit one entry at line 1.
214 if file_path.file_name().and_then(|s| s.to_str()) == Some("build.rs") {
215 entries.push(EntryPoint {
216 name: "build.rs".to_string(),
217 kind: EntryPointKind::BuildScript,
218 file_path: file_path.to_path_buf(),
219 line: 1,
220 });
221 }
222
223 let role = rust_file_role(file_path);
224
225 // A1: `pub use` re-exports in lib.rs / mod.rs surface the named
226 // items as LibraryExport entry points. Walk the top-level
227 // children — re-exports are only meaningful at module scope.
228 if role == RustFileRole::LibOrMod {
229 let mut cursor = root.walk();
230 for child in root.children(&mut cursor) {
231 if child.kind() == "use_declaration" && rust_use_is_pub(&child, bytes) {
232 collect_rust_pub_use_entries(&child, bytes, file_path, &mut entries);
233 }
234 }
235 }
236
237 // A3: `#[tool_router] impl ...` blocks make every method inside
238 // them framework-dispatched. Walk the AST top-down looking for
239 // impl_item nodes with a preceding `#[tool_router]` attribute and
240 // emit FrameworkDispatched entries for each contained method.
241 visit_rust_tool_router_impls(&root, bytes, file_path, &mut entries);
242
243 // Walk every function_item declaration recursively. For each item:
244 // - inspect its preceding attribute_item siblings for #[test],
245 // #[bench], #[no_mangle], #[proc_macro*], #[tool(...)]
246 // - inspect the function_item's own modifiers for `extern "C"`
247 // - inspect the name for `main`
248 // - if file is lib.rs/mod.rs and the item is `pub`, emit
249 // LibraryExport
250 // - if file is examples/*.rs and the item is `fn main`, emit Main
251 // - if file is benches/*.rs and the item is `fn main` or
252 // `pub fn benches`, emit Main
253 visit_rust_node(&root, bytes, file_path, role, &mut entries);
254 entries
255 }
256}
257
258fn visit_rust_node(
259 node: &Node<'_>,
260 bytes: &[u8],
261 file_path: &Path,
262 role: RustFileRole,
263 out: &mut Vec<EntryPoint>,
264) {
265 if node.kind() == "function_item" {
266 rust_classify_function(node, bytes, file_path, role, out);
267 }
268 let mut cursor = node.walk();
269 for child in node.children(&mut cursor) {
270 visit_rust_node(&child, bytes, file_path, role, out);
271 }
272}
273
274fn rust_classify_function(
275 node: &Node<'_>,
276 bytes: &[u8],
277 file_path: &Path,
278 role: RustFileRole,
279 out: &mut Vec<EntryPoint>,
280) {
281 // Find the function name. function_item has a `name` field whose
282 // value is an identifier child.
283 let name_node = node.child_by_field_name("name");
284 let Some(name_node) = name_node else { return };
285 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
286 return;
287 };
288 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
289
290 // Gather attributes that immediately precede this function. In
291 // tree-sitter-rust the attributes are SIBLING attribute_item nodes,
292 // not children of the function_item, so we walk previous siblings.
293 let attrs = collect_preceding_rust_attrs(node, bytes);
294
295 // A single function item may match multiple predicates (e.g. a
296 // `#[no_mangle] pub extern "C" fn main` in `lib.rs` is both Ffi
297 // and Main and LibraryExport). Emit one EntryPoint per matching
298 // predicate; the BFS in X2 treats each detection as a distinct
299 // reachability seed.
300
301 // #[proc_macro], #[proc_macro_derive], #[proc_macro_attribute].
302 if attrs.iter().any(|a| {
303 a.starts_with("proc_macro_derive")
304 || a.starts_with("proc_macro_attribute")
305 || a == "proc_macro"
306 || a.starts_with("proc_macro(")
307 }) {
308 out.push(EntryPoint {
309 name: name.to_string(),
310 kind: EntryPointKind::ProcMacro,
311 file_path: file_path.to_path_buf(),
312 line,
313 });
314 }
315
316 // #[test] / #[bench].
317 if attrs.iter().any(|a| a == "test" || a == "bench") {
318 out.push(EntryPoint {
319 name: name.to_string(),
320 kind: EntryPointKind::Test,
321 file_path: file_path.to_path_buf(),
322 line,
323 });
324 }
325
326 // FFI: #[no_mangle] OR `extern "C"` in the function declaration.
327 let function_text =
328 std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]).unwrap_or("");
329 let has_extern_c =
330 rust_function_has_extern_c(node, bytes) || function_text.contains("extern \"C\"");
331 if attrs.iter().any(|a| a == "no_mangle") || has_extern_c {
332 out.push(EntryPoint {
333 name: name.to_string(),
334 kind: EntryPointKind::Ffi,
335 file_path: file_path.to_path_buf(),
336 line,
337 });
338 }
339
340 // A3: `#[tool(...)]` framework-dispatched method.
341 if attrs.iter().any(|a| a == "tool" || a.starts_with("tool(")) {
342 out.push(EntryPoint {
343 name: name.to_string(),
344 kind: EntryPointKind::FrameworkDispatched,
345 file_path: file_path.to_path_buf(),
346 line,
347 });
348 }
349
350 // Main: `fn main` (with or without `pub`).
351 if name == "main" {
352 out.push(EntryPoint {
353 name: name.to_string(),
354 kind: EntryPointKind::Main,
355 file_path: file_path.to_path_buf(),
356 line,
357 });
358 }
359
360 // LibraryExport: `pub fn` in lib.rs / mod.rs.
361 if role == RustFileRole::LibOrMod && rust_function_is_pub(node, bytes) {
362 out.push(EntryPoint {
363 name: name.to_string(),
364 kind: EntryPointKind::LibraryExport,
365 file_path: file_path.to_path_buf(),
366 line,
367 });
368 }
369
370 // A2: Criterion benches expose `pub fn benches()` invoked via the
371 // `criterion_main!` macro. Treat any top-level `pub fn` in a
372 // `benches/*.rs` file as a Main entry — cargo's bench target invokes
373 // it as a binary entry.
374 if role == RustFileRole::Bench
375 && rust_function_is_pub(node, bytes)
376 && is_top_level_in_source(node)
377 {
378 out.push(EntryPoint {
379 name: name.to_string(),
380 kind: EntryPointKind::Main,
381 file_path: file_path.to_path_buf(),
382 line,
383 });
384 }
385}
386
387/// Return true if this node sits at the source-file root (its parent
388/// chain reaches `source_file` with no intervening item-bearing scope).
389///
390/// Used to scope examples/benches `pub fn` recognition to module-level
391/// declarations only.
392fn is_top_level_in_source(node: &Node<'_>) -> bool {
393 node.parent().is_some_and(|p| p.kind() == "source_file")
394}
395
396/// Collect the text of every `#[...]` attribute node that immediately
397/// precedes this function_item in source order. The returned strings are
398/// the attribute path/identifier (e.g. `"test"`, `"no_mangle"`,
399/// `"proc_macro_derive(Foo)"`), with the leading `#[` and trailing `]`
400/// stripped, and any leading `outer_attribute_item` `#[` punctuation
401/// removed.
402fn collect_preceding_rust_attrs(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
403 let mut attrs = Vec::new();
404 let mut prev = node.prev_sibling();
405 while let Some(p) = prev {
406 if p.kind() == "attribute_item" || p.kind() == "inner_attribute_item" {
407 // The attribute_item child structure is `# [ attribute ]`;
408 // pull the `attribute` child and use its text.
409 let mut cursor = p.walk();
410 let mut attr_text: Option<String> = None;
411 for child in p.children(&mut cursor) {
412 if child.kind() == "attribute"
413 && let Ok(text) =
414 std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
415 {
416 attr_text = Some(text.to_string());
417 }
418 }
419 if let Some(t) = attr_text {
420 attrs.push(t);
421 }
422 prev = p.prev_sibling();
423 } else if p.kind().starts_with("line_comment") || p.kind().starts_with("block_comment") {
424 prev = p.prev_sibling();
425 } else {
426 break;
427 }
428 }
429 attrs
430}
431
432/// Return true if the function_item node has a `pub` visibility modifier.
433fn rust_function_is_pub(node: &Node<'_>, bytes: &[u8]) -> bool {
434 let mut cursor = node.walk();
435 for child in node.children(&mut cursor) {
436 if child.kind() == "visibility_modifier"
437 && let Ok(text) = std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
438 {
439 return text.starts_with("pub");
440 }
441 }
442 false
443}
444
445/// Return true if a `use_declaration` node carries a `pub` visibility
446/// modifier (i.e. it is a `pub use ...;` re-export).
447fn rust_use_is_pub(node: &Node<'_>, bytes: &[u8]) -> bool {
448 let mut cursor = node.walk();
449 for child in node.children(&mut cursor) {
450 if child.kind() == "visibility_modifier"
451 && let Ok(text) = std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
452 {
453 return text.starts_with("pub");
454 }
455 }
456 false
457}
458
459/// Collect [`EntryPoint`]s for each name re-exported by a
460/// `pub use ...;` declaration.
461///
462/// Handles the four common shapes from `docs/PLAN.md` cluster A:
463/// - `pub use ::path::to::Item;` — emit `Item` (the trailing segment).
464/// - `pub use ::path::to::*;` — emit a glob entry whose `name` is the
465/// full path (the consumer at graph-walk time fans out to every
466/// matching definition).
467/// - `pub use ::path::to::{Foo, Bar};` — emit `Foo` and `Bar`.
468/// - `pub use ::path::to::Item as Alias;` — emit `Alias` (the alias is
469/// the exported surface name).
470fn collect_rust_pub_use_entries(
471 use_decl: &Node<'_>,
472 bytes: &[u8],
473 file_path: &Path,
474 out: &mut Vec<EntryPoint>,
475) {
476 let line = u32::try_from(use_decl.start_position().row + 1).unwrap_or(u32::MAX);
477 // Find the `argument` field of the use_declaration (tree-sitter-rust
478 // names the use tree this way). Fall back to walking children if the
479 // field is missing on this grammar version.
480 let argument = use_decl.child_by_field_name("argument").or_else(|| {
481 let mut cursor = use_decl.walk();
482 let mut found: Option<Node<'_>> = None;
483 for child in use_decl.children(&mut cursor) {
484 match child.kind() {
485 "scoped_identifier" | "scoped_use_list" | "use_list" | "use_as_clause"
486 | "use_wildcard" | "identifier" => {
487 found = Some(child);
488 break;
489 }
490 _ => {}
491 }
492 }
493 found
494 });
495 let Some(argument) = argument else { return };
496 rust_collect_use_tree(&argument, bytes, file_path, line, out);
497}
498
499/// Recursively walk a `pub use` tree, emitting one [`EntryPoint`] per
500/// leaf name (or one glob entry per `::*`).
501fn rust_collect_use_tree(
502 node: &Node<'_>,
503 bytes: &[u8],
504 file_path: &Path,
505 line: u32,
506 out: &mut Vec<EntryPoint>,
507) {
508 match node.kind() {
509 // Wildcard: `path::*` — emit the whole path as the entry name.
510 "use_wildcard" => {
511 if let Ok(text) = std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]) {
512 let trimmed = text.trim();
513 let normalised = trimmed.replace(char::is_whitespace, "");
514 out.push(EntryPoint {
515 name: normalised,
516 kind: EntryPointKind::LibraryExport,
517 file_path: file_path.to_path_buf(),
518 line,
519 });
520 }
521 }
522 // Braced group: `path::{Foo, Bar as Baz, sub::Qux}` — recurse
523 // into each element.
524 "use_list" => {
525 let mut cursor = node.walk();
526 for child in node.children(&mut cursor) {
527 if matches!(child.kind(), "," | "{" | "}") {
528 continue;
529 }
530 rust_collect_use_tree(&child, bytes, file_path, line, out);
531 }
532 }
533 // `path::{...}` is a `scoped_use_list`; walk into the list child.
534 "scoped_use_list" => {
535 let list = node.child_by_field_name("list").or_else(|| {
536 let mut cursor = node.walk();
537 node.children(&mut cursor).find(|c| c.kind() == "use_list")
538 });
539 if let Some(list) = list {
540 rust_collect_use_tree(&list, bytes, file_path, line, out);
541 }
542 }
543 // `path::Item as Alias` — the alias is the exported name.
544 "use_as_clause" => {
545 let alias = node.child_by_field_name("alias");
546 if let Some(alias) = alias
547 && let Ok(text) = std::str::from_utf8(&bytes[alias.start_byte()..alias.end_byte()])
548 {
549 out.push(EntryPoint {
550 name: text.to_string(),
551 kind: EntryPointKind::LibraryExport,
552 file_path: file_path.to_path_buf(),
553 line,
554 });
555 }
556 }
557 // `crate::a::b::c::Item` — the trailing identifier is the export.
558 "scoped_identifier" => {
559 let name = node.child_by_field_name("name");
560 if let Some(name) = name
561 && let Ok(text) = std::str::from_utf8(&bytes[name.start_byte()..name.end_byte()])
562 {
563 out.push(EntryPoint {
564 name: text.to_string(),
565 kind: EntryPointKind::LibraryExport,
566 file_path: file_path.to_path_buf(),
567 line,
568 });
569 }
570 }
571 // Bare `Item` (e.g. `pub use Item;`).
572 "identifier" => {
573 if let Ok(text) = std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]) {
574 out.push(EntryPoint {
575 name: text.to_string(),
576 kind: EntryPointKind::LibraryExport,
577 file_path: file_path.to_path_buf(),
578 line,
579 });
580 }
581 }
582 // Anything else (whitespace, punctuation, comments): ignore.
583 _ => {}
584 }
585}
586
587/// Walk the AST top-down looking for `#[tool_router] impl ...` blocks.
588/// For each such impl block, emit a [`EntryPointKind::FrameworkDispatched`]
589/// entry for every contained method.
590fn visit_rust_tool_router_impls(
591 node: &Node<'_>,
592 bytes: &[u8],
593 file_path: &Path,
594 out: &mut Vec<EntryPoint>,
595) {
596 if node.kind() == "impl_item" {
597 let attrs = collect_preceding_rust_attrs(node, bytes);
598 if attrs
599 .iter()
600 .any(|a| a == "tool_router" || a.starts_with("tool_router("))
601 {
602 // Walk the impl's body, emitting an entry per function_item.
603 if let Some(body) = node.child_by_field_name("body") {
604 let mut cursor = body.walk();
605 for child in body.children(&mut cursor) {
606 if child.kind() != "function_item" {
607 continue;
608 }
609 let Some(name_node) = child.child_by_field_name("name") else {
610 continue;
611 };
612 let Ok(name) =
613 std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
614 else {
615 continue;
616 };
617 let line = u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
618 out.push(EntryPoint {
619 name: name.to_string(),
620 kind: EntryPointKind::FrameworkDispatched,
621 file_path: file_path.to_path_buf(),
622 line,
623 });
624 }
625 }
626 }
627 }
628 let mut cursor = node.walk();
629 for child in node.children(&mut cursor) {
630 visit_rust_tool_router_impls(&child, bytes, file_path, out);
631 }
632}
633
634/// Return true if the function_item has an `extern "C"` ABI declaration
635/// as a function-modifier child (e.g. `pub extern "C" fn bar()`).
636fn rust_function_has_extern_c(node: &Node<'_>, bytes: &[u8]) -> bool {
637 let mut cursor = node.walk();
638 for child in node.children(&mut cursor) {
639 // tree-sitter-rust uses `function_modifiers` containing
640 // `extern_modifier`; the latter's child is a `string_literal`
641 // with the ABI name.
642 if child.kind() != "function_modifiers" {
643 continue;
644 }
645 let mut inner = child.walk();
646 for grandchild in child.children(&mut inner) {
647 if grandchild.kind() == "extern_modifier"
648 && let Ok(text) =
649 std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
650 && text.contains("\"C\"")
651 {
652 return true;
653 }
654 }
655 }
656 false
657}
658
659// ---------------------------------------------------------------------------
660// Python detector
661// ---------------------------------------------------------------------------
662
663/// Python entry-point detector.
664///
665/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 + Cycle 9
666/// B-0007 widening):
667/// - `if __name__ == "__main__":` blocks at module top level (Main)
668/// - Functions directly called from within the `__main__` block (Main) —
669/// e.g. `main()` or `cli()`. Seeds the actual callable as a BFS root.
670/// - Top-level functions decorated with `@click.command()`,
671/// `@typer.command()`, or any `@X.command()` pattern (Main).
672/// - Top-level functions named in `__all__` (LibraryExport)
673/// - Functions starting with `test_` in files matching `test_*.py` /
674/// `*_test.py` or under a `tests/` directory (Test)
675#[derive(Debug, Default, Clone, Copy)]
676pub struct PythonEntryDetector;
677
678impl EntryPointDetector for PythonEntryDetector {
679 #[expect(
680 clippy::too_many_lines,
681 reason = "two-pass detection: first-pass collects top-level fn names and CLI decorators, second-pass emits entries; helper functions keep individual pieces readable"
682 )]
683 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
684 let mut entries = Vec::new();
685 let Some(tree) = parse_with(source, &tree_sitter_python::LANGUAGE.into()) else {
686 return entries;
687 };
688 let root = tree.root_node();
689 let bytes = source.as_bytes();
690
691 let is_test_file = python_is_test_file(file_path);
692
693 // First pass: collect top-level function names and CLI decorators.
694 let mut toplevel_fns: Vec<(String, u32)> = Vec::new();
695 let mut click_decorated: Vec<(String, u32)> = Vec::new();
696 {
697 let mut cursor = root.walk();
698 for child in root.children(&mut cursor) {
699 match child.kind() {
700 "function_definition" => {
701 if let Some(name_node) = child.child_by_field_name("name")
702 && let Ok(name) = std::str::from_utf8(
703 &bytes[name_node.start_byte()..name_node.end_byte()],
704 )
705 {
706 let line =
707 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
708 toplevel_fns.push((name.to_string(), line));
709 }
710 }
711 "decorated_definition" => {
712 if let Some(fn_node) = child.child_by_field_name("definition")
713 && fn_node.kind() == "function_definition"
714 && let Some(name_node) = fn_node.child_by_field_name("name")
715 && let Ok(name) = std::str::from_utf8(
716 &bytes[name_node.start_byte()..name_node.end_byte()],
717 )
718 {
719 let line =
720 u32::try_from(fn_node.start_position().row + 1).unwrap_or(u32::MAX);
721 toplevel_fns.push((name.to_string(), line));
722 if python_has_cli_command_decorator(&child, bytes) {
723 click_decorated.push((name.to_string(), line));
724 }
725 }
726 }
727 _ => {}
728 }
729 }
730 }
731
732 // Second pass: emit entry points.
733 let mut cursor = root.walk();
734 for child in root.children(&mut cursor) {
735 match child.kind() {
736 "if_statement" if python_is_dunder_main_block(&child, bytes) => {
737 let line = u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
738 entries.push(EntryPoint {
739 name: "__main__".to_string(),
740 kind: EntryPointKind::Main,
741 file_path: file_path.to_path_buf(),
742 line,
743 });
744 // Also emit functions called directly in the block body.
745 // tree-sitter-python uses "consequence" for the block,
746 // not "body".
747 if let Some(body) = child.child_by_field_name("consequence") {
748 for called in python_direct_calls_in_block(&body, bytes) {
749 let fn_line = toplevel_fns
750 .iter()
751 .find(|(n, _)| n == &called)
752 .map(|(_, l)| *l)
753 .unwrap_or(line);
754 entries.push(EntryPoint {
755 name: called,
756 kind: EntryPointKind::Main,
757 file_path: file_path.to_path_buf(),
758 line: fn_line,
759 });
760 }
761 }
762 }
763 "expression_statement" => {
764 // `__all__ = [...]` is an expression_statement
765 // containing an assignment.
766 if let Some(names) = python_extract_dunder_all(&child, bytes) {
767 let line =
768 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
769 for n in names {
770 entries.push(EntryPoint {
771 name: n,
772 kind: EntryPointKind::LibraryExport,
773 file_path: file_path.to_path_buf(),
774 line,
775 });
776 }
777 }
778 }
779 "function_definition" | "decorated_definition" => {
780 let fn_node = if child.kind() == "decorated_definition" {
781 child.child_by_field_name("definition")
782 } else {
783 Some(child)
784 };
785 if let Some(fn_node) = fn_node
786 && fn_node.kind() == "function_definition"
787 && let Some(name_node) = fn_node.child_by_field_name("name")
788 && let Ok(name) = std::str::from_utf8(
789 &bytes[name_node.start_byte()..name_node.end_byte()],
790 )
791 && is_test_file
792 && name.starts_with("test_")
793 {
794 let line =
795 u32::try_from(fn_node.start_position().row + 1).unwrap_or(u32::MAX);
796 entries.push(EntryPoint {
797 name: name.to_string(),
798 kind: EntryPointKind::Test,
799 file_path: file_path.to_path_buf(),
800 line,
801 });
802 }
803 }
804 _ => {}
805 }
806 }
807
808 // Emit @click.command() / @typer.command() decorated functions.
809 for (name, line) in click_decorated {
810 entries.push(EntryPoint {
811 name,
812 kind: EntryPointKind::Main,
813 file_path: file_path.to_path_buf(),
814 line,
815 });
816 }
817
818 entries
819 }
820}
821
822fn python_is_test_file(file_path: &Path) -> bool {
823 let Some(file_name) = file_path.file_name().and_then(|s| s.to_str()) else {
824 return false;
825 };
826 let is_py = Path::new(file_name)
827 .extension()
828 .is_some_and(|ext| ext.eq_ignore_ascii_case("py"));
829 if !is_py {
830 return false;
831 }
832 let stem = Path::new(file_name)
833 .file_stem()
834 .and_then(|s| s.to_str())
835 .unwrap_or("");
836 if stem.starts_with("test_") || stem.ends_with("_test") {
837 return true;
838 }
839 // Any component named `tests` in the parent directory chain.
840 file_path
841 .components()
842 .any(|c| c.as_os_str() == std::ffi::OsStr::new("tests"))
843}
844
845fn python_is_dunder_main_block(node: &Node<'_>, bytes: &[u8]) -> bool {
846 // if condition: comparison `__name__ == "__main__"`.
847 let cond = node.child_by_field_name("condition");
848 let Some(cond) = cond else { return false };
849 let Ok(text) = std::str::from_utf8(&bytes[cond.start_byte()..cond.end_byte()]) else {
850 return false;
851 };
852 // Tolerate single or double quotes around `__main__`.
853 let normalized = text.replace(' ', "");
854 normalized.contains("__name__==\"__main__\"")
855 || normalized.contains("__name__=='__main__'")
856 || normalized.contains("\"__main__\"==__name__")
857 || normalized.contains("'__main__'==__name__")
858}
859
860/// Extract the string literals from a top-level `__all__ = [...]`
861/// assignment. Returns `None` if the statement is not such an assignment.
862fn python_extract_dunder_all(node: &Node<'_>, bytes: &[u8]) -> Option<Vec<String>> {
863 // expression_statement -> assignment (left, right)
864 let mut cursor = node.walk();
865 for child in node.children(&mut cursor) {
866 if child.kind() == "assignment" {
867 let left = child.child_by_field_name("left")?;
868 let right = child.child_by_field_name("right")?;
869 let left_text = std::str::from_utf8(&bytes[left.start_byte()..left.end_byte()]).ok()?;
870 if left_text.trim() != "__all__" {
871 return None;
872 }
873 // right is typically a `list` or `tuple` node containing
874 // `string` children.
875 let mut names = Vec::new();
876 let mut inner = right.walk();
877 for grandchild in right.children(&mut inner) {
878 if grandchild.kind() != "string" {
879 continue;
880 }
881 // Walk the string node to find string_content child.
882 let mut sc = grandchild.walk();
883 let mut content_text: Option<String> = None;
884 for sg in grandchild.children(&mut sc) {
885 if sg.kind() == "string_content"
886 && let Ok(t) = std::str::from_utf8(&bytes[sg.start_byte()..sg.end_byte()])
887 {
888 content_text = Some(t.to_string());
889 }
890 }
891 if let Some(t) = content_text {
892 names.push(t);
893 } else if let Ok(raw) =
894 std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
895 {
896 // Fallback: strip the outer quotes from the raw
897 // string text.
898 let trimmed = raw.trim_matches(|c| c == '"' || c == '\'');
899 names.push(trimmed.to_string());
900 }
901 }
902 return Some(names);
903 }
904 }
905 None
906}
907
908/// Return the names of functions called as bare `name()` (no attribute
909/// access) in the immediate statement children of a Python block node.
910///
911/// Used to extract the callable(s) invoked from an
912/// `if __name__ == "__main__":` block body — typically a single call like
913/// `main()` or `cli()`. Restricted to top-level
914/// `expression_statement → call → identifier` to avoid over-seeding.
915fn python_direct_calls_in_block(block: &Node<'_>, bytes: &[u8]) -> Vec<String> {
916 let mut names = Vec::new();
917 let mut cursor = block.walk();
918 for stmt in block.children(&mut cursor) {
919 if stmt.kind() != "expression_statement" {
920 continue;
921 }
922 let mut sc = stmt.walk();
923 for expr in stmt.children(&mut sc) {
924 if expr.kind() != "call" {
925 continue;
926 }
927 if let Some(func_node) = expr.child_by_field_name("function")
928 && func_node.kind() == "identifier"
929 && let Ok(name) =
930 std::str::from_utf8(&bytes[func_node.start_byte()..func_node.end_byte()])
931 && !name.is_empty()
932 {
933 names.push(name.to_string());
934 }
935 }
936 }
937 names
938}
939
940/// Return true if a `decorated_definition` node carries a CLI command
941/// decorator matching `@X.command()` or `@X.command` (where X is any
942/// module name such as `click`, `typer`, `app`, `cli`, etc.).
943///
944/// Only `.command` attribute access is matched — this covers all major
945/// Python CLI frameworks conservatively.
946fn python_has_cli_command_decorator(decorated_def: &Node<'_>, bytes: &[u8]) -> bool {
947 let mut cursor = decorated_def.walk();
948 for child in decorated_def.children(&mut cursor) {
949 if child.kind() != "decorator" {
950 continue;
951 }
952 let mut dc = child.walk();
953 for inner in child.children(&mut dc) {
954 match inner.kind() {
955 "call" => {
956 if let Some(func) = inner.child_by_field_name("function")
957 && func.kind() == "attribute"
958 && let Some(prop) = func.child_by_field_name("attribute")
959 && let Ok(prop_text) =
960 std::str::from_utf8(&bytes[prop.start_byte()..prop.end_byte()])
961 && prop_text == "command"
962 {
963 return true;
964 }
965 }
966 "attribute" => {
967 if let Some(prop) = inner.child_by_field_name("attribute")
968 && let Ok(prop_text) =
969 std::str::from_utf8(&bytes[prop.start_byte()..prop.end_byte()])
970 && prop_text == "command"
971 {
972 return true;
973 }
974 }
975 _ => {}
976 }
977 }
978 }
979 false
980}
981
982// ---------------------------------------------------------------------------
983// Go detector
984// ---------------------------------------------------------------------------
985
986/// Go entry-point detector.
987///
988/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
989/// - `func main()` in `package main` (Main)
990/// - `func init()` (Init) — runs automatically at package load
991/// - Functions starting with `Test`, `Benchmark`, `Example`, `Fuzz` (Test)
992/// - Exported names (starting with uppercase) in library packages
993/// (LibraryExport)
994#[derive(Debug, Default, Clone, Copy)]
995pub struct GoEntryDetector;
996
997impl EntryPointDetector for GoEntryDetector {
998 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
999 let mut entries = Vec::new();
1000 let Some(tree) = parse_with(source, &tree_sitter_go::LANGUAGE.into()) else {
1001 return entries;
1002 };
1003 let root = tree.root_node();
1004 let bytes = source.as_bytes();
1005
1006 // Determine the package name. `package main` enables `main` as
1007 // the binary entry; non-main packages are libraries whose
1008 // exported names are library entries.
1009 let package_name = go_package_name(&root, bytes).unwrap_or_default();
1010 let is_main_package = package_name == "main";
1011
1012 // Walk top-level function_declaration / method_declaration nodes.
1013 let mut cursor = root.walk();
1014 for child in root.children(&mut cursor) {
1015 match child.kind() {
1016 "function_declaration" => {
1017 if let Some(name_node) = child.child_by_field_name("name")
1018 && let Ok(name) = std::str::from_utf8(
1019 &bytes[name_node.start_byte()..name_node.end_byte()],
1020 )
1021 {
1022 let line =
1023 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
1024 go_classify(name, line, is_main_package, file_path, &mut entries);
1025 }
1026 }
1027 "method_declaration" => {
1028 // Methods participate in LibraryExport only — main / init
1029 // / Test* are exclusively free functions.
1030 if let Some(name_node) = child.child_by_field_name("name")
1031 && let Ok(name) = std::str::from_utf8(
1032 &bytes[name_node.start_byte()..name_node.end_byte()],
1033 )
1034 && !is_main_package
1035 && go_is_exported(name)
1036 {
1037 let line =
1038 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
1039 entries.push(EntryPoint {
1040 name: name.to_string(),
1041 kind: EntryPointKind::LibraryExport,
1042 file_path: file_path.to_path_buf(),
1043 line,
1044 });
1045 }
1046 }
1047 _ => {}
1048 }
1049 }
1050
1051 entries
1052 }
1053}
1054
1055fn go_package_name(root: &Node<'_>, bytes: &[u8]) -> Option<String> {
1056 let mut cursor = root.walk();
1057 for child in root.children(&mut cursor) {
1058 if child.kind() != "package_clause" {
1059 continue;
1060 }
1061 let mut inner = child.walk();
1062 for grandchild in child.children(&mut inner) {
1063 if grandchild.kind() == "package_identifier"
1064 && let Ok(text) =
1065 std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
1066 {
1067 return Some(text.to_string());
1068 }
1069 }
1070 }
1071 None
1072}
1073
1074fn go_classify(
1075 name: &str,
1076 line: u32,
1077 is_main_package: bool,
1078 file_path: &Path,
1079 out: &mut Vec<EntryPoint>,
1080) {
1081 if name == "main" && is_main_package {
1082 out.push(EntryPoint {
1083 name: name.to_string(),
1084 kind: EntryPointKind::Main,
1085 file_path: file_path.to_path_buf(),
1086 line,
1087 });
1088 return;
1089 }
1090 if name == "init" {
1091 out.push(EntryPoint {
1092 name: name.to_string(),
1093 kind: EntryPointKind::Init,
1094 file_path: file_path.to_path_buf(),
1095 line,
1096 });
1097 return;
1098 }
1099 if name.starts_with("Test")
1100 || name.starts_with("Benchmark")
1101 || name.starts_with("Example")
1102 || name.starts_with("Fuzz")
1103 {
1104 out.push(EntryPoint {
1105 name: name.to_string(),
1106 kind: EntryPointKind::Test,
1107 file_path: file_path.to_path_buf(),
1108 line,
1109 });
1110 return;
1111 }
1112 if !is_main_package && go_is_exported(name) {
1113 out.push(EntryPoint {
1114 name: name.to_string(),
1115 kind: EntryPointKind::LibraryExport,
1116 file_path: file_path.to_path_buf(),
1117 line,
1118 });
1119 }
1120}
1121
1122/// Return true if `name` starts with an ASCII uppercase letter, which is
1123/// Go's syntactic rule for an exported (package-public) identifier.
1124fn go_is_exported(name: &str) -> bool {
1125 name.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1126}
1127
1128// ---------------------------------------------------------------------------
1129// C detector
1130// ---------------------------------------------------------------------------
1131
1132/// C entry-point detector (I#73, Cycle 7 / 4.1.4).
1133///
1134/// Detects the following patterns as entry points:
1135/// - Any `function_definition` whose declarator resolves to the identifier
1136/// `main` — regardless of return type or parameter shape. This covers
1137/// `int main()`, `int main(int argc, char **argv)`, and the rare
1138/// pointer-return variant `int *main(void)`.
1139/// - `__attribute__((constructor))` annotated functions → [`EntryPointKind::Init`].
1140/// These run before `main` via ELF `.init_array`; they are BFS roots.
1141/// - `// export NAME` line comments (cgo-style FFI) → [`EntryPointKind::Ffi`].
1142/// Cgo emits these above C wrapper stubs for Go functions exported to C.
1143///
1144/// The detector does **not** attempt full C pre-processing. Macros that
1145/// expand to `main` or `__attribute__((constructor))` are not detected —
1146/// this is an acceptable limitation at this stage.
1147#[derive(Debug, Default, Clone, Copy)]
1148pub struct CEntryDetector;
1149
1150impl EntryPointDetector for CEntryDetector {
1151 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
1152 let mut entries = Vec::new();
1153 let Some(tree) = parse_with(source, &tree_sitter_c::LANGUAGE.into()) else {
1154 return entries;
1155 };
1156 let root = tree.root_node();
1157 let bytes = source.as_bytes();
1158
1159 // Collect cgo `//export NAME` comments first — they are not tied to
1160 // any AST function node; we scan the source text directly.
1161 c_collect_cgo_exports(source, file_path, &mut entries);
1162
1163 // Walk top-level declarations.
1164 let mut cursor = root.walk();
1165 for child in root.children(&mut cursor) {
1166 if child.kind() == "function_definition" {
1167 c_classify_function(&child, bytes, file_path, &mut entries);
1168 }
1169 }
1170
1171 entries
1172 }
1173}
1174
1175/// Classify a C `function_definition` node and emit entry points as
1176/// appropriate.
1177///
1178/// Handles three cases:
1179/// 1. Declarator is a `function_declarator` directly — covers `int main(…)`.
1180/// 2. Declarator is a `pointer_declarator` wrapping a `function_declarator`
1181/// — covers `int *main(void)`.
1182/// 3. The function has an `__attribute__((constructor))` specifier — emit
1183/// an additional [`EntryPointKind::Init`] entry.
1184fn c_classify_function(
1185 node: &tree_sitter::Node<'_>,
1186 bytes: &[u8],
1187 file_path: &Path,
1188 out: &mut Vec<EntryPoint>,
1189) {
1190 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1191
1192 // Resolve the innermost identifier name from the declarator.
1193 let Some(declarator) = node.child_by_field_name("declarator") else {
1194 return;
1195 };
1196 let Some(name) = c_resolve_function_name(&declarator, bytes) else {
1197 return;
1198 };
1199
1200 // Check for `__attribute__((constructor))` — the attribute_specifier
1201 // appears as a direct child of the function_definition (before the
1202 // type or declarator fields).
1203 let has_constructor_attr = c_has_constructor_attribute(node, bytes);
1204
1205 if name == "main" {
1206 out.push(EntryPoint {
1207 name: name.clone(),
1208 kind: EntryPointKind::Main,
1209 file_path: file_path.to_path_buf(),
1210 line,
1211 });
1212 }
1213
1214 if has_constructor_attr {
1215 out.push(EntryPoint {
1216 name,
1217 kind: EntryPointKind::Init,
1218 file_path: file_path.to_path_buf(),
1219 line,
1220 });
1221 }
1222}
1223
1224/// Walk a C declarator node to extract the innermost `identifier` that
1225/// names the function. Handles:
1226/// - `function_declarator` with `declarator: identifier` (direct case)
1227/// - `pointer_declarator` → `function_declarator` → `identifier` (pointer
1228/// return type)
1229///
1230/// Returns `None` if the name cannot be resolved (e.g., anonymous
1231/// declarations or grammar variants not covered here).
1232fn c_resolve_function_name(declarator: &tree_sitter::Node<'_>, bytes: &[u8]) -> Option<String> {
1233 match declarator.kind() {
1234 "function_declarator" => {
1235 // The inner declarator field holds the name.
1236 let inner = declarator.child_by_field_name("declarator")?;
1237 c_resolve_function_name(&inner, bytes)
1238 }
1239 "pointer_declarator" => {
1240 // Recurse: pointer_declarator wraps another declarator.
1241 let inner = declarator.child_by_field_name("declarator")?;
1242 c_resolve_function_name(&inner, bytes)
1243 }
1244 "identifier" => {
1245 let text =
1246 std::str::from_utf8(&bytes[declarator.start_byte()..declarator.end_byte()]).ok()?;
1247 Some(text.to_string())
1248 }
1249 _ => None,
1250 }
1251}
1252
1253/// Return true if the `function_definition` node has an
1254/// `__attribute__((constructor))` specifier as a direct child.
1255///
1256/// In tree-sitter-c the attribute appears as an `attribute_specifier`
1257/// child of `function_definition` (before the `type` field). The
1258/// `attribute_specifier` contains an `argument_list` whose first
1259/// element is an `identifier` with text `"constructor"`.
1260fn c_has_constructor_attribute(node: &tree_sitter::Node<'_>, bytes: &[u8]) -> bool {
1261 let mut cursor = node.walk();
1262 for child in node.children(&mut cursor) {
1263 if child.kind() != "attribute_specifier" {
1264 continue;
1265 }
1266 // Look for an argument_list child containing "constructor".
1267 let mut inner = child.walk();
1268 for grandchild in child.children(&mut inner) {
1269 if grandchild.kind() != "argument_list" {
1270 continue;
1271 }
1272 let mut arg_cur = grandchild.walk();
1273 for arg in grandchild.children(&mut arg_cur) {
1274 if arg.kind() == "identifier"
1275 && std::str::from_utf8(&bytes[arg.start_byte()..arg.end_byte()])
1276 .is_ok_and(|t| t == "constructor")
1277 {
1278 return true;
1279 }
1280 }
1281 }
1282 }
1283 false
1284}
1285
1286/// Scan the raw source for cgo-style `//export NAME` line comments and
1287/// emit [`EntryPointKind::Ffi`] entries for each exported name found.
1288///
1289/// cgo inserts these above the C stub for each Go function exported to C.
1290/// They are not associated with any AST node, so we scan the source text
1291/// directly. The format is exactly `//export <ident>` (no space after `//`).
1292fn c_collect_cgo_exports(source: &str, file_path: &Path, out: &mut Vec<EntryPoint>) {
1293 for (i, line) in source.lines().enumerate() {
1294 let trimmed = line.trim();
1295 if let Some(rest) = trimmed.strip_prefix("//export ") {
1296 let name = rest.trim();
1297 if !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1298 let line_num = u32::try_from(i + 1).unwrap_or(u32::MAX);
1299 out.push(EntryPoint {
1300 name: name.to_string(),
1301 kind: EntryPointKind::Ffi,
1302 file_path: file_path.to_path_buf(),
1303 line: line_num,
1304 });
1305 }
1306 }
1307 }
1308}
1309
1310// ---------------------------------------------------------------------------
1311// JavaScript / TypeScript detector
1312// ---------------------------------------------------------------------------
1313
1314/// JavaScript and TypeScript entry-point detector (I#70, Cycle 7 / 4.1.4).
1315///
1316/// Detects the following patterns:
1317/// - `export default function NAME(…)` → [`EntryPointKind::LibraryExport`]
1318/// - `export default class NAME` → [`EntryPointKind::LibraryExport`]
1319/// - `export const NAME = (…) => …` (named arrow export) →
1320/// [`EntryPointKind::LibraryExport`]
1321/// - `module.exports = …` → [`EntryPointKind::LibraryExport`] (named
1322/// `module.exports`)
1323/// - `exports.NAME = …` → [`EntryPointKind::LibraryExport`]
1324/// - `test(…)`, `it(…)`, `describe(…)` calls in `*.test.js` /
1325/// `*.spec.js` / `*.test.ts` / `*.spec.ts` files →
1326/// [`EntryPointKind::Test`]
1327///
1328/// The detector uses `tree-sitter-javascript` for all JS/TS sources.
1329/// TypeScript-specific syntax (type annotations, decorators) is handled
1330/// gracefully because tree-sitter-javascript degrades cleanly on TS.
1331#[derive(Debug, Default, Clone, Copy)]
1332pub struct JsEntryDetector;
1333
1334impl EntryPointDetector for JsEntryDetector {
1335 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
1336 let mut entries = Vec::new();
1337 let Some(tree) = parse_with(source, &tree_sitter_javascript::LANGUAGE.into()) else {
1338 return entries;
1339 };
1340 let root = tree.root_node();
1341 let bytes = source.as_bytes();
1342
1343 let is_test_file = js_is_test_file(file_path);
1344
1345 // First pass: detect the CommonJS object-accumulation alias
1346 // `var app = exports = module.exports = {}` so the second pass
1347 // can recognise `app.METHOD = function...` as LibraryExport.
1348 let module_exports_alias = js_find_module_exports_alias(&root, bytes);
1349
1350 let mut cursor = root.walk();
1351 for child in root.children(&mut cursor) {
1352 match child.kind() {
1353 "export_statement" => {
1354 js_classify_export(&child, bytes, file_path, &mut entries);
1355 }
1356 "expression_statement" => {
1357 js_classify_expression_statement(
1358 &child,
1359 bytes,
1360 file_path,
1361 is_test_file,
1362 module_exports_alias.as_deref(),
1363 &mut entries,
1364 );
1365 }
1366 _ => {}
1367 }
1368 }
1369
1370 entries
1371 }
1372}
1373
1374/// Return true if the file path indicates a JS/TS test file.
1375///
1376/// Matches `*.test.js`, `*.spec.js`, `*.test.ts`, `*.spec.ts`,
1377/// `*.test.jsx`, `*.spec.jsx`, `*.test.tsx`, `*.spec.tsx` and any file
1378/// under a `__tests__` directory.
1379fn js_is_test_file(file_path: &Path) -> bool {
1380 let Some(file_name) = file_path.file_name().and_then(|s| s.to_str()) else {
1381 return false;
1382 };
1383 // Check for .test.* or .spec.* before the final extension.
1384 let stem_lower = file_name.to_ascii_lowercase();
1385 if stem_lower.contains(".test.") || stem_lower.contains(".spec.") {
1386 return true;
1387 }
1388 // Check for __tests__ directory component.
1389 file_path
1390 .components()
1391 .any(|c| c.as_os_str() == std::ffi::OsStr::new("__tests__"))
1392}
1393
1394/// Scan the top-level AST for the CommonJS object-accumulation pattern
1395/// `var ALIAS = exports = module.exports = {}` and return the local
1396/// variable name (`ALIAS`) if found.
1397///
1398/// The pattern appears in express's `lib/application.js`:
1399/// ```text
1400/// var app = exports = module.exports = {};
1401/// ```
1402/// Subsequent `app.use = function...`, `app.handle = function...` etc.
1403/// are all library exports whose reachability depends on knowing `app` is
1404/// the module.exports alias.
1405///
1406/// Returns the first alias found; returns `None` when the pattern is absent.
1407fn js_find_module_exports_alias(root: &tree_sitter::Node<'_>, bytes: &[u8]) -> Option<String> {
1408 let mut cursor = root.walk();
1409 for child in root.children(&mut cursor) {
1410 if child.kind() != "variable_declaration" {
1411 continue;
1412 }
1413 let mut vc = child.walk();
1414 for decl in child.children(&mut vc) {
1415 if decl.kind() != "variable_declarator" {
1416 continue;
1417 }
1418 let Some(name_node) = decl.child_by_field_name("name") else {
1419 continue;
1420 };
1421 let Ok(alias) =
1422 std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1423 else {
1424 continue;
1425 };
1426 let Some(value) = decl.child_by_field_name("value") else {
1427 continue;
1428 };
1429 if js_assignment_reaches_module_exports(&value, bytes) {
1430 return Some(alias.to_string());
1431 }
1432 }
1433 }
1434 None
1435}
1436
1437/// Return true if `node` is an `assignment_expression` (possibly nested)
1438/// that has `module.exports` as one of its left-hand sides.
1439///
1440/// Handles both direct and chained forms:
1441/// - `module.exports = {}`
1442/// - `exports = module.exports = {}`
1443fn js_assignment_reaches_module_exports(node: &tree_sitter::Node<'_>, bytes: &[u8]) -> bool {
1444 if node.kind() != "assignment_expression" {
1445 return false;
1446 }
1447 if let Some(left) = node.child_by_field_name("left")
1448 && left.kind() == "member_expression"
1449 && let (Some(obj), Some(prop)) = (
1450 left.child_by_field_name("object"),
1451 left.child_by_field_name("property"),
1452 )
1453 {
1454 let obj_text = std::str::from_utf8(&bytes[obj.start_byte()..obj.end_byte()]).unwrap_or("");
1455 let prop_text =
1456 std::str::from_utf8(&bytes[prop.start_byte()..prop.end_byte()]).unwrap_or("");
1457 if obj_text == "module" && prop_text == "exports" {
1458 return true;
1459 }
1460 }
1461 // Recurse into RHS for chained assignments.
1462 node.child_by_field_name("right")
1463 .is_some_and(|right| js_assignment_reaches_module_exports(&right, bytes))
1464}
1465
1466/// Classify a top-level `export_statement` node and emit entry points.
1467///
1468/// Handles the patterns:
1469/// - `export default function NAME(…)` / `export default class NAME`
1470/// - `export const NAME = (…) => …` (named arrow-function export)
1471fn js_classify_export(
1472 node: &tree_sitter::Node<'_>,
1473 bytes: &[u8],
1474 file_path: &Path,
1475 out: &mut Vec<EntryPoint>,
1476) {
1477 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1478
1479 // Walk children to find the `declaration` field (or the `default`
1480 // keyword plus inline declaration). tree-sitter-javascript uses the
1481 // `declaration` field for named exports and places the value inline
1482 // after `default` for default exports.
1483 let mut cursor = node.walk();
1484 for child in node.children(&mut cursor) {
1485 match child.kind() {
1486 "function_declaration" => {
1487 // `export default function NAME(…)` or `export function NAME(…)`.
1488 if let Some(name_node) = child.child_by_field_name("name")
1489 && let Ok(name) =
1490 std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1491 {
1492 out.push(EntryPoint {
1493 name: name.to_string(),
1494 kind: EntryPointKind::LibraryExport,
1495 file_path: file_path.to_path_buf(),
1496 line,
1497 });
1498 }
1499 }
1500 "class_declaration" => {
1501 // `export default class NAME` or `export class NAME`.
1502 if let Some(name_node) = child.child_by_field_name("name")
1503 && let Ok(name) =
1504 std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1505 {
1506 out.push(EntryPoint {
1507 name: name.to_string(),
1508 kind: EntryPointKind::LibraryExport,
1509 file_path: file_path.to_path_buf(),
1510 line,
1511 });
1512 }
1513 }
1514 "lexical_declaration" => {
1515 // `export const NAME = (…) => …` — walk variable declarators.
1516 js_collect_lexical_exports(&child, bytes, file_path, line, out);
1517 }
1518 _ => {}
1519 }
1520 }
1521}
1522
1523/// Walk a `lexical_declaration` (const/let) inside an export statement
1524/// and emit LibraryExport entries for each variable whose value is an
1525/// arrow function or function expression.
1526fn js_collect_lexical_exports(
1527 node: &tree_sitter::Node<'_>,
1528 bytes: &[u8],
1529 file_path: &Path,
1530 line: u32,
1531 out: &mut Vec<EntryPoint>,
1532) {
1533 let mut cursor = node.walk();
1534 for child in node.children(&mut cursor) {
1535 if child.kind() != "variable_declarator" {
1536 continue;
1537 }
1538 let Some(name_node) = child.child_by_field_name("name") else {
1539 continue;
1540 };
1541 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1542 else {
1543 continue;
1544 };
1545 // Only emit if the RHS is a function-like value (arrow or
1546 // function expression). Non-function exports (e.g. string
1547 // constants) are not entry points.
1548 if child
1549 .child_by_field_name("value")
1550 .is_some_and(|v| matches!(v.kind(), "arrow_function" | "function_expression"))
1551 {
1552 out.push(EntryPoint {
1553 name: name.to_string(),
1554 kind: EntryPointKind::LibraryExport,
1555 file_path: file_path.to_path_buf(),
1556 line,
1557 });
1558 }
1559 }
1560}
1561
1562/// Classify a top-level `expression_statement` node.
1563///
1564/// Handles:
1565/// - `module.exports = …` — emits a LibraryExport entry named `module.exports`.
1566/// - `exports.NAME = …` — emits a LibraryExport entry named `NAME`.
1567/// - `exports = module.exports = VALUE` — chained assignment; emits
1568/// LibraryExport for the value name when it is an identifier.
1569/// - `ALIAS.METHOD = function...` — emits LibraryExport named `METHOD`
1570/// when `ALIAS` is the known `module.exports` alias (B-0010).
1571/// - `test(…)` / `it(…)` / `describe(…)` — emits a Test entry when in a
1572/// test file.
1573fn js_classify_expression_statement(
1574 node: &tree_sitter::Node<'_>,
1575 bytes: &[u8],
1576 file_path: &Path,
1577 is_test_file: bool,
1578 module_exports_alias: Option<&str>,
1579 out: &mut Vec<EntryPoint>,
1580) {
1581 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1582
1583 let mut cursor = node.walk();
1584 for child in node.children(&mut cursor) {
1585 match child.kind() {
1586 "assignment_expression" => {
1587 js_classify_assignment(&child, bytes, file_path, line, module_exports_alias, out);
1588 }
1589 "call_expression" if is_test_file => {
1590 js_classify_test_call(&child, bytes, file_path, line, out);
1591 }
1592 _ => {}
1593 }
1594 }
1595}
1596
1597/// Classify an `assignment_expression` for CommonJS export patterns.
1598///
1599/// Emits:
1600/// - `module.exports = …` → LibraryExport named `"module.exports"`
1601/// - `exports.NAME = …` → LibraryExport named `NAME`
1602/// - `exports = module.exports = VALUE` → recurses, seeding VALUE name
1603/// - `ALIAS.METHOD = function...` → LibraryExport named `METHOD` when
1604/// `ALIAS` is the known module.exports alias (B-0010)
1605fn js_classify_assignment(
1606 node: &tree_sitter::Node<'_>,
1607 bytes: &[u8],
1608 file_path: &Path,
1609 line: u32,
1610 module_exports_alias: Option<&str>,
1611 out: &mut Vec<EntryPoint>,
1612) {
1613 let Some(left) = node.child_by_field_name("left") else {
1614 return;
1615 };
1616
1617 // Handle chained assignment: `exports = module.exports = VALUE`
1618 // The left side is a bare `exports` identifier, and the RHS is
1619 // another assignment_expression (or a direct value).
1620 if left.kind() == "identifier" {
1621 let Ok(left_name) = std::str::from_utf8(&bytes[left.start_byte()..left.end_byte()]) else {
1622 return;
1623 };
1624 if left_name == "exports"
1625 && let Some(right) = node.child_by_field_name("right")
1626 {
1627 if right.kind() == "assignment_expression" {
1628 // Recurse: `exports = module.exports = VALUE`
1629 js_classify_assignment(&right, bytes, file_path, line, module_exports_alias, out);
1630 } else {
1631 // `exports = VALUE` — emit VALUE if it is a named fn.
1632 js_emit_identifier_as_export(&right, bytes, file_path, line, out);
1633 }
1634 }
1635 return;
1636 }
1637
1638 if left.kind() != "member_expression" {
1639 return;
1640 }
1641 let Some(obj_node) = left.child_by_field_name("object") else {
1642 return;
1643 };
1644 let Some(prop_node) = left.child_by_field_name("property") else {
1645 return;
1646 };
1647 let Ok(obj) = std::str::from_utf8(&bytes[obj_node.start_byte()..obj_node.end_byte()]) else {
1648 return;
1649 };
1650 let Ok(prop) = std::str::from_utf8(&bytes[prop_node.start_byte()..prop_node.end_byte()]) else {
1651 return;
1652 };
1653
1654 if obj == "module" && prop == "exports" {
1655 // `module.exports = ...` — the whole module is exported.
1656 out.push(EntryPoint {
1657 name: "module.exports".to_string(),
1658 kind: EntryPointKind::LibraryExport,
1659 file_path: file_path.to_path_buf(),
1660 line,
1661 });
1662 } else if obj == "exports" {
1663 // `exports.NAME = ...` — a named CommonJS export.
1664 out.push(EntryPoint {
1665 name: prop.to_string(),
1666 kind: EntryPointKind::LibraryExport,
1667 file_path: file_path.to_path_buf(),
1668 line,
1669 });
1670 } else if module_exports_alias.is_some_and(|alias| alias == obj) {
1671 // `ALIAS.METHOD = function...` — CommonJS object-accumulation.
1672 // Only emit when the RHS is a function-like value.
1673 let is_fn = node.child_by_field_name("right").is_some_and(|v| {
1674 matches!(
1675 v.kind(),
1676 "function_expression" | "arrow_function" | "function_declaration"
1677 )
1678 });
1679 if is_fn {
1680 out.push(EntryPoint {
1681 name: prop.to_string(),
1682 kind: EntryPointKind::LibraryExport,
1683 file_path: file_path.to_path_buf(),
1684 line,
1685 });
1686 }
1687 }
1688}
1689
1690/// Emit a LibraryExport entry when `node` is an identifier naming a
1691/// function or factory (used for `exports = VALUE` assignments).
1692fn js_emit_identifier_as_export(
1693 node: &tree_sitter::Node<'_>,
1694 bytes: &[u8],
1695 file_path: &Path,
1696 line: u32,
1697 out: &mut Vec<EntryPoint>,
1698) {
1699 if node.kind() == "identifier"
1700 && let Ok(name) = std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()])
1701 && !name.is_empty()
1702 {
1703 out.push(EntryPoint {
1704 name: name.to_string(),
1705 kind: EntryPointKind::LibraryExport,
1706 file_path: file_path.to_path_buf(),
1707 line,
1708 });
1709 }
1710}
1711
1712/// Classify a `call_expression` for Jest/Vitest test runner patterns.
1713///
1714/// Emits Test entries for `test(…)`, `it(…)`, and `describe(…)` calls.
1715/// The first string argument (if present) becomes the entry name; falls
1716/// back to the call function name when the first argument is not a string.
1717fn js_classify_test_call(
1718 node: &tree_sitter::Node<'_>,
1719 bytes: &[u8],
1720 file_path: &Path,
1721 line: u32,
1722 out: &mut Vec<EntryPoint>,
1723) {
1724 let Some(func_node) = node.child_by_field_name("function") else {
1725 return;
1726 };
1727 let Ok(func_name) = std::str::from_utf8(&bytes[func_node.start_byte()..func_node.end_byte()])
1728 else {
1729 return;
1730 };
1731 if !matches!(func_name, "test" | "it" | "describe") {
1732 return;
1733 }
1734
1735 // Try to extract the first string argument as the test name.
1736 let entry_name = node
1737 .child_by_field_name("arguments")
1738 .and_then(|args| {
1739 let mut c = args.walk();
1740 args.children(&mut c).find(|ch| ch.kind() == "string")
1741 })
1742 .and_then(|s| {
1743 // String node: look for a string_fragment child.
1744 let mut c = s.walk();
1745 s.children(&mut c).find(|ch| ch.kind() == "string_fragment")
1746 })
1747 .and_then(|frag| {
1748 std::str::from_utf8(&bytes[frag.start_byte()..frag.end_byte()])
1749 .ok()
1750 .map(ToString::to_string)
1751 })
1752 .unwrap_or_else(|| func_name.to_string());
1753
1754 out.push(EntryPoint {
1755 name: entry_name,
1756 kind: EntryPointKind::Test,
1757 file_path: file_path.to_path_buf(),
1758 line,
1759 });
1760}
1761
1762// ---------------------------------------------------------------------------
1763// Java detector (B-0009, Cycle 9 / 4.1.5)
1764// ---------------------------------------------------------------------------
1765
1766/// Java entry-point detector (B-0009, Cycle 9).
1767///
1768/// Detects the entry-point shapes that anchor JVM-ecosystem call graphs.
1769/// Before this detector, `find_dead_code` returned `dead_fraction = 1.0`
1770/// on every Java codebase — there were no seeds, so BFS reachability
1771/// reached nothing.
1772///
1773/// Patterns recognised:
1774/// - `public static void main(String[] args)` → [`EntryPointKind::Main`]
1775/// - JUnit method annotations `@Test`, `@ParameterizedTest`,
1776/// `@RepeatedTest`, `@TestFactory` → [`EntryPointKind::Test`]
1777/// - Spring DI / stereotype class annotations `@Component`, `@Service`,
1778/// `@Repository`, `@Controller`, `@RestController`, `@Configuration`,
1779/// `@AutoConfiguration` → [`EntryPointKind::LibraryExport`]
1780/// - Spring `@Bean`-annotated methods → [`EntryPointKind::LibraryExport`]
1781/// (each bean is a library export from the DI container's perspective)
1782/// - `@SpringBootApplication`, `@SpringBootTest` annotated classes →
1783/// [`EntryPointKind::FrameworkDispatched`] (Spring container drives
1784/// their lifecycle; static call graph cannot see the invocation)
1785///
1786/// Annotation matching is by the trailing identifier — both
1787/// `@Component` and `@org.springframework.stereotype.Component` are
1788/// recognised. The detector treats `marker_annotation` (`@Foo`) and
1789/// `annotation` (`@Foo(args)`) uniformly.
1790#[derive(Debug, Default, Clone, Copy)]
1791pub struct JavaEntryDetector;
1792
1793impl EntryPointDetector for JavaEntryDetector {
1794 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
1795 let mut entries = Vec::new();
1796 let Some(tree) = parse_with(source, &tree_sitter_java::LANGUAGE.into()) else {
1797 return entries;
1798 };
1799 let root = tree.root_node();
1800 let bytes = source.as_bytes();
1801 visit_java_node(&root, bytes, file_path, &mut entries);
1802 entries
1803 }
1804}
1805
1806/// Recursively walk the Java AST emitting entry points for matching
1807/// class and method declarations.
1808fn visit_java_node(node: &Node<'_>, bytes: &[u8], file_path: &Path, out: &mut Vec<EntryPoint>) {
1809 match node.kind() {
1810 "class_declaration" | "interface_declaration" | "enum_declaration" => {
1811 java_classify_class(node, bytes, file_path, out);
1812 }
1813 "method_declaration" => {
1814 java_classify_method(node, bytes, file_path, out);
1815 }
1816 _ => {}
1817 }
1818 let mut cursor = node.walk();
1819 for child in node.children(&mut cursor) {
1820 visit_java_node(&child, bytes, file_path, out);
1821 }
1822}
1823
1824/// Annotations that mark a class as a Spring-framework-dispatched
1825/// application entry whose lifecycle the container drives.
1826const JAVA_FRAMEWORK_CLASS_ANNOTATIONS: &[&str] = &[
1827 "SpringBootApplication",
1828 "SpringBootTest",
1829 "EnableAutoConfiguration",
1830];
1831
1832/// Annotations that mark a class as a Spring DI bean / library-export
1833/// surface. The Spring container instantiates these even when no
1834/// in-source caller does.
1835const JAVA_STEREOTYPE_CLASS_ANNOTATIONS: &[&str] = &[
1836 "Component",
1837 "Service",
1838 "Repository",
1839 "Controller",
1840 "RestController",
1841 "Configuration",
1842 "AutoConfiguration",
1843 "ConfigurationProperties",
1844];
1845
1846/// Annotations that mark a method as a JUnit test entry.
1847const JAVA_TEST_METHOD_ANNOTATIONS: &[&str] = &[
1848 "Test",
1849 "ParameterizedTest",
1850 "RepeatedTest",
1851 "TestFactory",
1852 "TestTemplate",
1853 "BeforeEach",
1854 "AfterEach",
1855 "BeforeAll",
1856 "AfterAll",
1857];
1858
1859/// Classify a Java class/interface/enum declaration.
1860fn java_classify_class(node: &Node<'_>, bytes: &[u8], file_path: &Path, out: &mut Vec<EntryPoint>) {
1861 let Some(name_node) = node.child_by_field_name("name") else {
1862 return;
1863 };
1864 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
1865 return;
1866 };
1867 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1868 let annotations = java_collect_annotation_names(node, bytes);
1869
1870 if annotations
1871 .iter()
1872 .any(|a| JAVA_FRAMEWORK_CLASS_ANNOTATIONS.contains(&a.as_str()))
1873 {
1874 out.push(EntryPoint {
1875 name: name.to_string(),
1876 kind: EntryPointKind::FrameworkDispatched,
1877 file_path: file_path.to_path_buf(),
1878 line,
1879 });
1880 }
1881
1882 if annotations
1883 .iter()
1884 .any(|a| JAVA_STEREOTYPE_CLASS_ANNOTATIONS.contains(&a.as_str()))
1885 {
1886 out.push(EntryPoint {
1887 name: name.to_string(),
1888 kind: EntryPointKind::LibraryExport,
1889 file_path: file_path.to_path_buf(),
1890 line,
1891 });
1892 }
1893}
1894
1895/// Classify a Java method declaration.
1896fn java_classify_method(
1897 node: &Node<'_>,
1898 bytes: &[u8],
1899 file_path: &Path,
1900 out: &mut Vec<EntryPoint>,
1901) {
1902 let Some(name_node) = node.child_by_field_name("name") else {
1903 return;
1904 };
1905 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
1906 return;
1907 };
1908 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1909 let annotations = java_collect_annotation_names(node, bytes);
1910
1911 // `public static void main(String[] args)` — the JVM entry contract.
1912 // tree-sitter-java places `public`, `static`, etc. as direct children
1913 // of a `modifiers` node; check both modifier text and method shape.
1914 if name == "main" && java_method_is_public_static(node, bytes) {
1915 out.push(EntryPoint {
1916 name: name.to_string(),
1917 kind: EntryPointKind::Main,
1918 file_path: file_path.to_path_buf(),
1919 line,
1920 });
1921 }
1922
1923 if annotations
1924 .iter()
1925 .any(|a| JAVA_TEST_METHOD_ANNOTATIONS.contains(&a.as_str()))
1926 {
1927 out.push(EntryPoint {
1928 name: name.to_string(),
1929 kind: EntryPointKind::Test,
1930 file_path: file_path.to_path_buf(),
1931 line,
1932 });
1933 }
1934
1935 // @Bean methods are library-export surfaces — Spring publishes them
1936 // into the application context regardless of in-source callers.
1937 if annotations.iter().any(|a| a == "Bean") {
1938 out.push(EntryPoint {
1939 name: name.to_string(),
1940 kind: EntryPointKind::LibraryExport,
1941 file_path: file_path.to_path_buf(),
1942 line,
1943 });
1944 }
1945}
1946
1947/// Collect annotation identifiers from a declaration's `modifiers` child.
1948///
1949/// tree-sitter-java places annotations inside a `modifiers` node child
1950/// of the declaration. Each annotation is either:
1951/// - `marker_annotation` — bare `@Foo`, with a `name` field that is an
1952/// `identifier` or `scoped_identifier`.
1953/// - `annotation` — `@Foo(args)`, same `name` field shape.
1954///
1955/// We extract the trailing identifier from the annotation name so that
1956/// both `@Component` and `@org.springframework.stereotype.Component`
1957/// resolve to `"Component"`.
1958fn java_collect_annotation_names(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
1959 let mut names = Vec::new();
1960 let Some(modifiers) = java_find_modifiers_child(node) else {
1961 return names;
1962 };
1963 let mut cursor = modifiers.walk();
1964 for child in modifiers.children(&mut cursor) {
1965 match child.kind() {
1966 "marker_annotation" | "annotation" => {
1967 if let Some(name_node) = child.child_by_field_name("name")
1968 && let Some(ident) = java_annotation_trailing_identifier(&name_node, bytes)
1969 {
1970 names.push(ident);
1971 }
1972 }
1973 _ => {}
1974 }
1975 }
1976 names
1977}
1978
1979/// Find the `modifiers` child of a Java declaration node, if present.
1980fn java_find_modifiers_child<'tree>(node: &Node<'tree>) -> Option<Node<'tree>> {
1981 let mut cursor = node.walk();
1982 node.children(&mut cursor)
1983 .find(|&child| child.kind() == "modifiers")
1984}
1985
1986/// Extract the trailing identifier from a Java annotation name node.
1987///
1988/// Handles two shapes:
1989/// - `(identifier)` — bare `@Foo`; returns `"Foo"`.
1990/// - `(scoped_identifier scope: ... name: (identifier))` — fully qualified
1991/// `@a.b.Foo`; returns `"Foo"`. Falls back to walking children to find
1992/// the last `identifier` if the `name` field is not present.
1993fn java_annotation_trailing_identifier(name_node: &Node<'_>, bytes: &[u8]) -> Option<String> {
1994 match name_node.kind() {
1995 "identifier" => std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1996 .ok()
1997 .map(ToString::to_string),
1998 "scoped_identifier" => {
1999 // Prefer the `name` field; fall back to the last `identifier`
2000 // child if the field is unavailable on this grammar version.
2001 if let Some(name) = name_node.child_by_field_name("name") {
2002 return std::str::from_utf8(&bytes[name.start_byte()..name.end_byte()])
2003 .ok()
2004 .map(ToString::to_string);
2005 }
2006 let mut last: Option<String> = None;
2007 let mut cursor = name_node.walk();
2008 for child in name_node.children(&mut cursor) {
2009 if child.kind() == "identifier"
2010 && let Ok(text) =
2011 std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
2012 {
2013 last = Some(text.to_string());
2014 }
2015 }
2016 last
2017 }
2018 _ => None,
2019 }
2020}
2021
2022/// Return true if a Java `method_declaration` has both `public` and
2023/// `static` modifiers — required for the JVM `main` entry contract.
2024fn java_method_is_public_static(node: &Node<'_>, bytes: &[u8]) -> bool {
2025 let Some(modifiers) = java_find_modifiers_child(node) else {
2026 return false;
2027 };
2028 let text =
2029 std::str::from_utf8(&bytes[modifiers.start_byte()..modifiers.end_byte()]).unwrap_or("");
2030 // tree-sitter-java emits `public` and `static` as unnamed children
2031 // (keyword tokens) inside `modifiers`. The whole-text contains-check
2032 // is a reliable proxy and avoids enumerating the grammar's keyword
2033 // node kinds, which differ across grammar versions.
2034 text.contains("public") && text.contains("static")
2035}
2036
2037// ---------------------------------------------------------------------------
2038// Kotlin detector (B-0009, Cycle 9 / 4.1.5)
2039// ---------------------------------------------------------------------------
2040
2041/// Kotlin entry-point detector (B-0009, Cycle 9).
2042///
2043/// Detects:
2044/// - Top-level `fun main(...)` → [`EntryPointKind::Main`]
2045/// - Classes annotated with `@Component` / `@Service` / `@Repository` /
2046/// `@Controller` / `@RestController` / `@Configuration` →
2047/// [`EntryPointKind::LibraryExport`]
2048/// - Classes annotated with `@SpringBootApplication` /
2049/// `@SpringBootTest` → [`EntryPointKind::FrameworkDispatched`]
2050/// - Functions annotated with `@Test`, `@ParameterizedTest`,
2051/// `@RepeatedTest` → [`EntryPointKind::Test`]
2052/// - Classes whose names end in `Test` or `Spec` (Spek / KotlinTest
2053/// convention) → [`EntryPointKind::Test`]
2054///
2055/// Uses `tree_sitter_kotlin_ng` — the same grammar wired into the
2056/// chunker via `crates/ripvec-core/src/languages.rs`. Annotations are
2057/// matched by trailing identifier (so `@Component` and
2058/// `@org.springframework.stereotype.Component` both resolve).
2059#[derive(Debug, Default, Clone, Copy)]
2060pub struct KotlinEntryDetector;
2061
2062impl EntryPointDetector for KotlinEntryDetector {
2063 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
2064 let mut entries = Vec::new();
2065 let Some(tree) = parse_with(source, &tree_sitter_kotlin_ng::LANGUAGE.into()) else {
2066 return entries;
2067 };
2068 let root = tree.root_node();
2069 let bytes = source.as_bytes();
2070
2071 // Top-level functions: `fun main(...)` is the JVM entry contract.
2072 // Walk only the root's direct children to enforce top-level scope.
2073 let mut cursor = root.walk();
2074 for child in root.children(&mut cursor) {
2075 if child.kind() == "function_declaration" {
2076 kotlin_classify_top_level_function(&child, bytes, file_path, &mut entries);
2077 }
2078 }
2079
2080 // Class / object declarations + nested function declarations.
2081 // Use full-AST walk so annotated nested classes and methods are
2082 // also seeded.
2083 visit_kotlin_node(&root, bytes, file_path, &mut entries);
2084
2085 entries
2086 }
2087}
2088
2089/// Annotations that mark a Kotlin class as Spring-framework-dispatched.
2090const KOTLIN_FRAMEWORK_CLASS_ANNOTATIONS: &[&str] = &[
2091 "SpringBootApplication",
2092 "SpringBootTest",
2093 "EnableAutoConfiguration",
2094];
2095
2096/// Annotations that mark a Kotlin class as a DI / library-export surface.
2097const KOTLIN_STEREOTYPE_CLASS_ANNOTATIONS: &[&str] = &[
2098 "Component",
2099 "Service",
2100 "Repository",
2101 "Controller",
2102 "RestController",
2103 "Configuration",
2104 "AutoConfiguration",
2105 "ConfigurationProperties",
2106];
2107
2108/// Annotations that mark a Kotlin function as a test entry.
2109const KOTLIN_TEST_FUNCTION_ANNOTATIONS: &[&str] = &[
2110 "Test",
2111 "ParameterizedTest",
2112 "RepeatedTest",
2113 "TestFactory",
2114 "TestTemplate",
2115];
2116
2117/// Recursively walk the Kotlin AST emitting entry points for class /
2118/// object / function declarations.
2119fn visit_kotlin_node(node: &Node<'_>, bytes: &[u8], file_path: &Path, out: &mut Vec<EntryPoint>) {
2120 match node.kind() {
2121 "class_declaration" | "object_declaration" => {
2122 kotlin_classify_class(node, bytes, file_path, out);
2123 }
2124 "function_declaration" => {
2125 kotlin_classify_function_annotations(node, bytes, file_path, out);
2126 }
2127 _ => {}
2128 }
2129 let mut cursor = node.walk();
2130 for child in node.children(&mut cursor) {
2131 visit_kotlin_node(&child, bytes, file_path, out);
2132 }
2133}
2134
2135/// Classify a top-level Kotlin function declaration.
2136///
2137/// Emits [`EntryPointKind::Main`] for any top-level `fun main`, whether
2138/// declared with `()`, `(args: Array<String>)`, or the Kotlin 1.3+
2139/// suspended `suspend fun main(...)` shape.
2140fn kotlin_classify_top_level_function(
2141 node: &Node<'_>,
2142 bytes: &[u8],
2143 file_path: &Path,
2144 out: &mut Vec<EntryPoint>,
2145) {
2146 let Some(name_node) = node.child_by_field_name("name") else {
2147 return;
2148 };
2149 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
2150 return;
2151 };
2152 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
2153 if name == "main" {
2154 out.push(EntryPoint {
2155 name: name.to_string(),
2156 kind: EntryPointKind::Main,
2157 file_path: file_path.to_path_buf(),
2158 line,
2159 });
2160 }
2161}
2162
2163/// Classify annotations on a Kotlin function (anywhere in the tree).
2164///
2165/// Emits [`EntryPointKind::Test`] for `@Test` family annotations.
2166fn kotlin_classify_function_annotations(
2167 node: &Node<'_>,
2168 bytes: &[u8],
2169 file_path: &Path,
2170 out: &mut Vec<EntryPoint>,
2171) {
2172 let Some(name_node) = node.child_by_field_name("name") else {
2173 return;
2174 };
2175 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
2176 return;
2177 };
2178 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
2179 let annotations = kotlin_collect_annotation_names(node, bytes);
2180 if annotations
2181 .iter()
2182 .any(|a| KOTLIN_TEST_FUNCTION_ANNOTATIONS.contains(&a.as_str()))
2183 {
2184 out.push(EntryPoint {
2185 name: name.to_string(),
2186 kind: EntryPointKind::Test,
2187 file_path: file_path.to_path_buf(),
2188 line,
2189 });
2190 }
2191}
2192
2193/// Classify a Kotlin class / object declaration.
2194fn kotlin_classify_class(
2195 node: &Node<'_>,
2196 bytes: &[u8],
2197 file_path: &Path,
2198 out: &mut Vec<EntryPoint>,
2199) {
2200 let Some(name_node) = node.child_by_field_name("name") else {
2201 return;
2202 };
2203 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
2204 return;
2205 };
2206 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
2207 let annotations = kotlin_collect_annotation_names(node, bytes);
2208
2209 if annotations
2210 .iter()
2211 .any(|a| KOTLIN_FRAMEWORK_CLASS_ANNOTATIONS.contains(&a.as_str()))
2212 {
2213 out.push(EntryPoint {
2214 name: name.to_string(),
2215 kind: EntryPointKind::FrameworkDispatched,
2216 file_path: file_path.to_path_buf(),
2217 line,
2218 });
2219 }
2220
2221 if annotations
2222 .iter()
2223 .any(|a| KOTLIN_STEREOTYPE_CLASS_ANNOTATIONS.contains(&a.as_str()))
2224 {
2225 out.push(EntryPoint {
2226 name: name.to_string(),
2227 kind: EntryPointKind::LibraryExport,
2228 file_path: file_path.to_path_buf(),
2229 line,
2230 });
2231 }
2232
2233 // Convention-based: classes named `*Test` or `*Spec` are test entries
2234 // even without an explicit annotation — covers Spek / KotlinTest /
2235 // many Spring projects' naming patterns.
2236 if (name.ends_with("Test") || name.ends_with("Spec")) && name.len() > 4 {
2237 out.push(EntryPoint {
2238 name: name.to_string(),
2239 kind: EntryPointKind::Test,
2240 file_path: file_path.to_path_buf(),
2241 line,
2242 });
2243 }
2244}
2245
2246/// Collect annotation identifiers preceding a Kotlin declaration.
2247///
2248/// In `tree-sitter-kotlin-ng` annotations appear inside a `modifiers`
2249/// (or `modifier_list`) child of the declaration. The annotation node
2250/// kinds vary by grammar version — we accept `annotation` and
2251/// `single_annotation` and pull the trailing identifier from whichever
2252/// child contains the annotation's user_type / identifier.
2253fn kotlin_collect_annotation_names(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
2254 let mut names = Vec::new();
2255 let mut cursor = node.walk();
2256 for child in node.children(&mut cursor) {
2257 match child.kind() {
2258 "modifiers" | "modifier_list" => {
2259 kotlin_collect_annotations_in(&child, bytes, &mut names);
2260 }
2261 // Some grammar variants attach annotations directly as
2262 // siblings of the declaration body rather than inside a
2263 // `modifiers` node — handle that case too.
2264 "annotation" | "single_annotation" => {
2265 if let Some(ident) = kotlin_annotation_identifier(&child, bytes) {
2266 names.push(ident);
2267 }
2268 }
2269 _ => {}
2270 }
2271 }
2272 names
2273}
2274
2275/// Walk into a Kotlin `modifiers` node and collect annotation names.
2276fn kotlin_collect_annotations_in(node: &Node<'_>, bytes: &[u8], out: &mut Vec<String>) {
2277 let mut cursor = node.walk();
2278 for child in node.children(&mut cursor) {
2279 match child.kind() {
2280 "annotation" | "single_annotation" => {
2281 if let Some(ident) = kotlin_annotation_identifier(&child, bytes) {
2282 out.push(ident);
2283 }
2284 }
2285 _ => {}
2286 }
2287 }
2288}
2289
2290/// Extract the trailing identifier from a Kotlin annotation node.
2291///
2292/// Kotlin annotations parse as `@<user_type>` where `user_type` is a
2293/// dot-separated chain of `simple_user_type` nodes each containing a
2294/// `(simple_identifier)` (or just `(identifier)` in `tree-sitter-kotlin-ng`).
2295/// We walk the annotation subtree and return the last identifier found —
2296/// matching `@Component` and `@org.springframework.stereotype.Component`
2297/// uniformly to `"Component"`.
2298fn kotlin_annotation_identifier(node: &Node<'_>, bytes: &[u8]) -> Option<String> {
2299 let mut last: Option<String> = None;
2300 let mut stack: Vec<Node<'_>> = vec![*node];
2301 while let Some(n) = stack.pop() {
2302 if matches!(n.kind(), "identifier" | "simple_identifier")
2303 && let Ok(text) = std::str::from_utf8(&bytes[n.start_byte()..n.end_byte()])
2304 {
2305 // Skip the `@` token itself if it happens to be tokenised as
2306 // an identifier (it isn't in practice, but be defensive).
2307 if !text.is_empty() && text != "@" {
2308 last = Some(text.to_string());
2309 }
2310 }
2311 let mut cursor = n.walk();
2312 for child in n.children(&mut cursor) {
2313 stack.push(child);
2314 }
2315 }
2316 last
2317}
2318
2319// ---------------------------------------------------------------------------
2320// Dispatch
2321// ---------------------------------------------------------------------------
2322
2323/// Return the entry-point detector for a language identifier.
2324///
2325/// `language` is the lowercased language name as used in
2326/// `crate::languages` (`"rust"`, `"python"`, `"go"`, `"c"`, `"javascript"`,
2327/// `"java"`, `"kotlin"`).
2328/// Returns `None` for any language not yet covered by this module.
2329///
2330/// File-extension dispatch (`"rs"`, `"py"`, `"pyi"`, `"go"`, `"c"`,
2331/// `"h"`, `"js"`, `"jsx"`, `"ts"`, `"tsx"`, `"java"`, `"kt"`, `"kts"`) is
2332/// also accepted for caller convenience — the BFS walk in X2 carries
2333/// extensions, not language names, through its per-file loop.
2334#[must_use]
2335pub fn detector_for(language: &str) -> Option<Box<dyn EntryPointDetector>> {
2336 match language {
2337 "rust" | "rs" => Some(Box::new(RustEntryDetector)),
2338 "python" | "py" | "pyi" => Some(Box::new(PythonEntryDetector)),
2339 "go" => Some(Box::new(GoEntryDetector)),
2340 "c" | "h" => Some(Box::new(CEntryDetector)),
2341 "javascript" | "js" | "jsx" | "typescript" | "ts" | "tsx" => {
2342 Some(Box::new(JsEntryDetector))
2343 }
2344 "java" => Some(Box::new(JavaEntryDetector)),
2345 "kotlin" | "kt" | "kts" => Some(Box::new(KotlinEntryDetector)),
2346 _ => None,
2347 }
2348}
2349
2350// ---------------------------------------------------------------------------
2351// Summary aggregation (4.1.1 Front A node A4)
2352// ---------------------------------------------------------------------------
2353
2354/// Render a per-kind count map as a sorted list of human-friendly summary
2355/// lines for the `find_dead_code` MCP tool's `entry_points_detected`
2356/// field.
2357///
2358/// Each line follows the shape `"<count> <label>"` — e.g. `"12
2359/// framework-dispatched (MCP tools)"`, `"3 library exports"`. The output
2360/// is sorted lexicographically so the surface order is deterministic
2361/// across calls.
2362///
2363/// Lives in `ripvec-core` so the MCP tool wrapper and any future CLI
2364/// consumer share a single labelling convention. Added in 4.1.1 (Wave 1
2365/// Front A node A4) alongside [`EntryPointKind::FrameworkDispatched`].
2366#[must_use]
2367pub fn summarize_entry_point_kinds<S: std::hash::BuildHasher>(
2368 counts: &std::collections::HashMap<EntryPointKind, usize, S>,
2369) -> Vec<String> {
2370 let mut summary: Vec<String> = counts
2371 .iter()
2372 .map(|(kind, count)| format!("{count} {label}", label = label_for_kind(*kind)))
2373 .collect();
2374 summary.sort();
2375 summary
2376}
2377
2378/// Return the human-friendly label used in the
2379/// [`summarize_entry_point_kinds`] output for a given variant.
2380///
2381/// Exposed `pub` so external consumers can format individual kinds
2382/// without rebuilding a count map.
2383#[must_use]
2384pub fn label_for_kind(kind: EntryPointKind) -> &'static str {
2385 match kind {
2386 EntryPointKind::Main => "main",
2387 EntryPointKind::LibraryExport => "library exports",
2388 EntryPointKind::Test => "tests",
2389 EntryPointKind::Ffi => "FFI",
2390 EntryPointKind::ProcMacro => "proc-macros",
2391 EntryPointKind::Init => "init functions",
2392 EntryPointKind::BuildScript => "build scripts",
2393 EntryPointKind::FrameworkDispatched => "framework-dispatched (MCP tools)",
2394 }
2395}
2396
2397// ---------------------------------------------------------------------------
2398// Internal helpers
2399// ---------------------------------------------------------------------------
2400
2401/// Parse `source` with the given tree-sitter `Language`. Returns `None`
2402/// if the parser cannot be configured or the parse fails.
2403fn parse_with(source: &str, language: &tree_sitter::Language) -> Option<tree_sitter::Tree> {
2404 let mut parser = Parser::new();
2405 parser.set_language(language).ok()?;
2406 parser.parse(source, None)
2407}
2408
2409// Unused-but-keep-for-X2 helpers. These ride alongside the detector
2410// implementations so X2 has a single import point for the BFS-time
2411// helpers.
2412//
2413// `query_match_lines` returns the 1-based line of every match of a
2414// compiled tree-sitter query against `source`. X2 will use this to
2415// post-process the raw RepoGraph definitions when an entry-point
2416// predicate fires on something that is not itself a Definition (e.g.
2417// the Python `if __name__ == "__main__"` block isn't a Definition —
2418// it's a top-level statement that anchors any function it calls).
2419//
2420// We expose it as `pub(crate)` so X2 can consume without it widening
2421// the public surface.
2422
2423#[allow(dead_code)]
2424pub(crate) fn query_match_lines(
2425 source: &str,
2426 language: &tree_sitter::Language,
2427 query: &Query,
2428) -> Vec<u32> {
2429 let mut lines = Vec::new();
2430 let Some(tree) = parse_with(source, language) else {
2431 return lines;
2432 };
2433 let mut cursor = QueryCursor::new();
2434 let mut matches = cursor.matches(query, tree.root_node(), source.as_bytes());
2435 while let Some(m) = matches.next() {
2436 for cap in m.captures {
2437 let line = u32::try_from(cap.node.start_position().row + 1).unwrap_or(u32::MAX);
2438 lines.push(line);
2439 }
2440 }
2441 lines
2442}