Skip to main content

ripvec_core/
entry_points.rs

1//! Per-language entry-point detection for the `find_dead_code` MCP tool
2//! (4.1.0).
3//!
4//! The trait [`EntryPointDetector`] and its per-language implementors
5//! ([`RustEntryDetector`], [`PythonEntryDetector`], [`GoEntryDetector`])
6//! identify the syntactic shapes that act as roots of the call graph: the
7//! BFS reachability walk for dead-code detection seeds from the union of
8//! all [`EntryPoint`]s emitted across the indexed corpus.
9//!
10//! This module is X1 of the 4.1.0 series; the actual reachability walk and
11//! cluster discovery (`RepoGraph::compute_dead_code`) lands in X2. The MCP
12//! tool wrapper lands in X3. The remaining language detectors land in X4.
13//! See `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 for the per-language
14//! entry-point survey and Section 3 for the algorithm that consumes this
15//! output.
16//!
17//! ## Type B (Wired-Stub) self-audit note
18//!
19//! Until X2 lands, every public item in this module is consumed only from
20//! the integration tests under `crates/ripvec-core/tests/entry_points.rs`.
21//! `scripts/check_wiring_gaps.sh` will report these as Type B findings.
22//! The findings are **explicitly deferred** to X2 — see the Section 9
23//! PLAN.md entry — not silently dangling. Do not annotate with
24//! `#[doc(hidden)]`: the doc-visibility surface is part of the X2 contract
25//! and is the intended public API of the dead-code module.
26
27use std::path::{Path, PathBuf};
28
29use streaming_iterator::StreamingIterator;
30use tree_sitter::{Node, Parser, Query, QueryCursor};
31
32/// Classification of why a [`Definition`](crate::repo_map::Definition)-shaped
33/// item is treated as an entry point for the dead-code reachability walk.
34///
35/// Categories follow Section 2 of `docs/FIND_DEAD_CODE_DESIGN.md`. The
36/// classification is per-detection, not per-definition: the same
37/// `pub fn` can appear as both [`EntryPointKind::Main`] (for binaries) and
38/// [`EntryPointKind::LibraryExport`] (for libraries) depending on how the
39/// containing crate is structured. Downstream consumers (X2) treat each
40/// detection independently.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum EntryPointKind {
43    /// A binary-crate `main`-shaped entry: `fn main()` in Rust, `func main()`
44    /// in Go, the `if __name__ == "__main__"` block in Python.
45    Main,
46
47    /// A public-API surface item: `pub` re-exports in Rust libraries,
48    /// `__all__` exports in Python, capitalised names in Go libraries.
49    LibraryExport,
50
51    /// A test entry: `#[test]` / `#[bench]` in Rust, `def test_*` /
52    /// `*_test.py` in Python, `func TestX` / `BenchmarkX` / `ExampleX` /
53    /// `FuzzX` in Go.
54    Test,
55
56    /// A foreign-function-interface entry: `#[no_mangle]` /
57    /// `extern "C"` in Rust, cgo `//export` in Go.
58    Ffi,
59
60    /// A procedural-macro entry: `#[proc_macro]`, `#[proc_macro_derive]`,
61    /// `#[proc_macro_attribute]` in Rust.
62    ProcMacro,
63
64    /// A package-initialisation entry: `func init()` in Go.
65    Init,
66
67    /// A build-script entry: Cargo's `build.rs`.
68    BuildScript,
69
70    /// A method invoked by a framework-generated dispatcher whose call site
71    /// the static call graph cannot see — e.g. rmcp's `#[tool(...)]`
72    /// methods, whose dispatch table is synthesised by a procedural macro
73    /// at compile time. Without explicit seeding these methods appear dead
74    /// to BFS reachability even though they are the user-facing API.
75    ///
76    /// Added in 4.1.1 (Wave 1 Front A, node A4) after live measurement
77    /// against ripvec itself reported `dead_fraction = 0.986` because
78    /// every `#[tool]`-annotated worker was unreachable from the call
79    /// graph — see `DEV_JOURNAL.md` 4.1.1 entry.
80    FrameworkDispatched,
81}
82
83/// A single entry-point detection in one source file.
84///
85/// Per-detection, not per-definition — the same `pub fn` can produce
86/// multiple `EntryPoint` instances (one for each matching predicate).
87/// Downstream consumers should treat each detection as an independent
88/// reachability seed.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct EntryPoint {
91    /// The symbol name of the entry point. For Rust this is the function
92    /// item identifier; for Python it is the function or module-level
93    /// expression name; for Go it is the function declaration identifier.
94    pub name: String,
95
96    /// Why this item was treated as an entry point.
97    pub kind: EntryPointKind,
98
99    /// The source file the entry point was detected in.
100    pub file_path: PathBuf,
101
102    /// 1-based line number of the entry point declaration. Matches the
103    /// `start_line` field of [`crate::repo_map::Definition`].
104    pub line: u32,
105}
106
107/// Per-language entry-point detector.
108///
109/// Designed for consumption by `RepoGraph::compute_dead_code` in
110/// 4.1.0-X2. Until X2 lands, the only consumers are the integration tests
111/// under `crates/ripvec-core/tests/entry_points.rs` — see the
112/// module-level docstring for the Type B (Wired-Stub) self-audit note.
113///
114/// Implementations parse the source once per call. The parsing cost is
115/// trivial (tree-sitter is O(n) and the source is already in memory at
116/// detection time), and stateless parsers compose more cleanly than a
117/// shared parser cache across the three (and eventually eleven) language
118/// detectors. X2's `RepoGraph::compute_dead_code` already iterates
119/// per-file, so the per-file parse adds no additional walk cost.
120pub trait EntryPointDetector {
121    /// Return every entry point declared in this source file.
122    ///
123    /// `source` is the full UTF-8 contents of `file_path`. The path is
124    /// passed alongside `source` so detectors that consider filename
125    /// patterns (e.g. Python's `test_*.py` and `*_test.py`,
126    /// Rust's `build.rs`) can use both signals.
127    ///
128    /// If parsing fails, returns an empty vector — entry-point detection
129    /// is best-effort and should never abort the dead-code walk.
130    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint>;
131}
132
133// ---------------------------------------------------------------------------
134// Rust detector
135// ---------------------------------------------------------------------------
136
137/// Rust entry-point detector.
138///
139/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 + 4.1.1 Front A
140/// widening):
141/// - `pub fn main()` and bare `fn main()` (Main)
142/// - `pub fn` items in `lib.rs` / `mod.rs` (LibraryExport)
143/// - `pub use` re-exports in `lib.rs` / `mod.rs` (LibraryExport, A1)
144/// - Top-level `fn main` in `examples/*.rs` / `benches/*.rs` (Main, A2)
145/// - Criterion `pub fn benches()` in `benches/*.rs` (Main, A2)
146/// - Items annotated with `#[test]` or `#[bench]` (Test)
147/// - Items annotated with `#[no_mangle]` or marked `extern "C"` (Ffi)
148/// - Items annotated with `#[proc_macro]`, `#[proc_macro_derive]`, or
149///   `#[proc_macro_attribute]` (ProcMacro)
150/// - Methods annotated with `#[tool(...)]` or every method inside
151///   `#[tool_router] impl ...` (FrameworkDispatched, A3)
152/// - The entire `build.rs` file is treated as a single BuildScript entry
153///   point (the build script's `main` is the cargo-known entry).
154///
155/// File-path role detection lives in [`rust_file_role`]: lib/mod files
156/// gain LibraryExport surfacing, examples/benches files surface their
157/// top-level `fn main` and `pub fn` items as cargo-known entries.
158#[derive(Debug, Default, Clone, Copy)]
159pub struct RustEntryDetector;
160
161/// Classification of a Rust source file by its role in the cargo workspace.
162///
163/// The detector uses this to widen entry-point recognition beyond what is
164/// observable from source alone: `examples/*.rs` and `benches/*.rs` files
165/// are cargo-known entries even when their `fn main` carries no annotation.
166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
167enum RustFileRole {
168    /// `src/lib.rs` or any `mod.rs` — the crate's published interface.
169    LibOrMod,
170    /// A file under `examples/` (cargo example binary).
171    Example,
172    /// A file under `benches/` (cargo benchmark target).
173    Bench,
174    /// Anything else.
175    Other,
176}
177
178/// Determine which cargo role a Rust source path plays.
179///
180/// Examples and benches are recognised by any `examples` / `benches`
181/// component in the path — cargo only honours top-level `examples/` and
182/// `benches/` directories per crate, but the path-component check is
183/// sufficient for our entry-point purposes (a synthetic file we test
184/// against may live anywhere on disk).
185fn rust_file_role(file_path: &Path) -> RustFileRole {
186    if matches!(
187        file_path.file_name().and_then(|s| s.to_str()),
188        Some("lib.rs" | "mod.rs")
189    ) {
190        return RustFileRole::LibOrMod;
191    }
192    for component in file_path.components() {
193        match component.as_os_str().to_str() {
194            Some("examples") => return RustFileRole::Example,
195            Some("benches") => return RustFileRole::Bench,
196            _ => {}
197        }
198    }
199    RustFileRole::Other
200}
201
202impl EntryPointDetector for RustEntryDetector {
203    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
204        let mut entries = Vec::new();
205        let Some(tree) = parse_with(source, &tree_sitter_rust::LANGUAGE.into()) else {
206            return entries;
207        };
208        let root = tree.root_node();
209        let bytes = source.as_bytes();
210
211        // Treat the entire build.rs file as a single BuildScript entry.
212        // The crate's main may be named anything inside build.rs (cargo
213        // calls the file's main), so we emit one entry at line 1.
214        if file_path.file_name().and_then(|s| s.to_str()) == Some("build.rs") {
215            entries.push(EntryPoint {
216                name: "build.rs".to_string(),
217                kind: EntryPointKind::BuildScript,
218                file_path: file_path.to_path_buf(),
219                line: 1,
220            });
221        }
222
223        let role = rust_file_role(file_path);
224
225        // A1: `pub use` re-exports in lib.rs / mod.rs surface the named
226        // items as LibraryExport entry points. Walk the top-level
227        // children — re-exports are only meaningful at module scope.
228        if role == RustFileRole::LibOrMod {
229            let mut cursor = root.walk();
230            for child in root.children(&mut cursor) {
231                if child.kind() == "use_declaration" && rust_use_is_pub(&child, bytes) {
232                    collect_rust_pub_use_entries(&child, bytes, file_path, &mut entries);
233                }
234            }
235        }
236
237        // A3: `#[tool_router] impl ...` blocks make every method inside
238        // them framework-dispatched. Walk the AST top-down looking for
239        // impl_item nodes with a preceding `#[tool_router]` attribute and
240        // emit FrameworkDispatched entries for each contained method.
241        visit_rust_tool_router_impls(&root, bytes, file_path, &mut entries);
242
243        // Walk every function_item declaration recursively. For each item:
244        //   - inspect its preceding attribute_item siblings for #[test],
245        //     #[bench], #[no_mangle], #[proc_macro*], #[tool(...)]
246        //   - inspect the function_item's own modifiers for `extern "C"`
247        //   - inspect the name for `main`
248        //   - if file is lib.rs/mod.rs and the item is `pub`, emit
249        //     LibraryExport
250        //   - if file is examples/*.rs and the item is `fn main`, emit Main
251        //   - if file is benches/*.rs and the item is `fn main` or
252        //     `pub fn benches`, emit Main
253        visit_rust_node(&root, bytes, file_path, role, &mut entries);
254        entries
255    }
256}
257
258fn visit_rust_node(
259    node: &Node<'_>,
260    bytes: &[u8],
261    file_path: &Path,
262    role: RustFileRole,
263    out: &mut Vec<EntryPoint>,
264) {
265    if node.kind() == "function_item" {
266        rust_classify_function(node, bytes, file_path, role, out);
267    }
268    let mut cursor = node.walk();
269    for child in node.children(&mut cursor) {
270        visit_rust_node(&child, bytes, file_path, role, out);
271    }
272}
273
274fn rust_classify_function(
275    node: &Node<'_>,
276    bytes: &[u8],
277    file_path: &Path,
278    role: RustFileRole,
279    out: &mut Vec<EntryPoint>,
280) {
281    // Find the function name. function_item has a `name` field whose
282    // value is an identifier child.
283    let name_node = node.child_by_field_name("name");
284    let Some(name_node) = name_node else { return };
285    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
286        return;
287    };
288    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
289
290    // Gather attributes that immediately precede this function. In
291    // tree-sitter-rust the attributes are SIBLING attribute_item nodes,
292    // not children of the function_item, so we walk previous siblings.
293    let attrs = collect_preceding_rust_attrs(node, bytes);
294
295    // A single function item may match multiple predicates (e.g. a
296    // `#[no_mangle] pub extern "C" fn main` in `lib.rs` is both Ffi
297    // and Main and LibraryExport). Emit one EntryPoint per matching
298    // predicate; the BFS in X2 treats each detection as a distinct
299    // reachability seed.
300
301    // #[proc_macro], #[proc_macro_derive], #[proc_macro_attribute].
302    if attrs.iter().any(|a| {
303        a.starts_with("proc_macro_derive")
304            || a.starts_with("proc_macro_attribute")
305            || a == "proc_macro"
306            || a.starts_with("proc_macro(")
307    }) {
308        out.push(EntryPoint {
309            name: name.to_string(),
310            kind: EntryPointKind::ProcMacro,
311            file_path: file_path.to_path_buf(),
312            line,
313        });
314    }
315
316    // #[test] / #[bench].
317    if attrs.iter().any(|a| a == "test" || a == "bench") {
318        out.push(EntryPoint {
319            name: name.to_string(),
320            kind: EntryPointKind::Test,
321            file_path: file_path.to_path_buf(),
322            line,
323        });
324    }
325
326    // FFI: #[no_mangle] OR `extern "C"` in the function declaration.
327    let function_text =
328        std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]).unwrap_or("");
329    let has_extern_c =
330        rust_function_has_extern_c(node, bytes) || function_text.contains("extern \"C\"");
331    if attrs.iter().any(|a| a == "no_mangle") || has_extern_c {
332        out.push(EntryPoint {
333            name: name.to_string(),
334            kind: EntryPointKind::Ffi,
335            file_path: file_path.to_path_buf(),
336            line,
337        });
338    }
339
340    // A3: `#[tool(...)]` framework-dispatched method.
341    if attrs.iter().any(|a| a == "tool" || a.starts_with("tool(")) {
342        out.push(EntryPoint {
343            name: name.to_string(),
344            kind: EntryPointKind::FrameworkDispatched,
345            file_path: file_path.to_path_buf(),
346            line,
347        });
348    }
349
350    // Main: `fn main` (with or without `pub`).
351    if name == "main" {
352        out.push(EntryPoint {
353            name: name.to_string(),
354            kind: EntryPointKind::Main,
355            file_path: file_path.to_path_buf(),
356            line,
357        });
358    }
359
360    // LibraryExport: `pub fn` in lib.rs / mod.rs.
361    if role == RustFileRole::LibOrMod && rust_function_is_pub(node, bytes) {
362        out.push(EntryPoint {
363            name: name.to_string(),
364            kind: EntryPointKind::LibraryExport,
365            file_path: file_path.to_path_buf(),
366            line,
367        });
368    }
369
370    // A2: Criterion benches expose `pub fn benches()` invoked via the
371    // `criterion_main!` macro. Treat any top-level `pub fn` in a
372    // `benches/*.rs` file as a Main entry — cargo's bench target invokes
373    // it as a binary entry.
374    if role == RustFileRole::Bench
375        && rust_function_is_pub(node, bytes)
376        && is_top_level_in_source(node)
377    {
378        out.push(EntryPoint {
379            name: name.to_string(),
380            kind: EntryPointKind::Main,
381            file_path: file_path.to_path_buf(),
382            line,
383        });
384    }
385}
386
387/// Return true if this node sits at the source-file root (its parent
388/// chain reaches `source_file` with no intervening item-bearing scope).
389///
390/// Used to scope examples/benches `pub fn` recognition to module-level
391/// declarations only.
392fn is_top_level_in_source(node: &Node<'_>) -> bool {
393    node.parent().is_some_and(|p| p.kind() == "source_file")
394}
395
396/// Collect the text of every `#[...]` attribute node that immediately
397/// precedes this function_item in source order. The returned strings are
398/// the attribute path/identifier (e.g. `"test"`, `"no_mangle"`,
399/// `"proc_macro_derive(Foo)"`), with the leading `#[` and trailing `]`
400/// stripped, and any leading `outer_attribute_item` `#[` punctuation
401/// removed.
402fn collect_preceding_rust_attrs(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
403    let mut attrs = Vec::new();
404    let mut prev = node.prev_sibling();
405    while let Some(p) = prev {
406        if p.kind() == "attribute_item" || p.kind() == "inner_attribute_item" {
407            // The attribute_item child structure is `# [ attribute ]`;
408            // pull the `attribute` child and use its text.
409            let mut cursor = p.walk();
410            let mut attr_text: Option<String> = None;
411            for child in p.children(&mut cursor) {
412                if child.kind() == "attribute"
413                    && let Ok(text) =
414                        std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
415                {
416                    attr_text = Some(text.to_string());
417                }
418            }
419            if let Some(t) = attr_text {
420                attrs.push(t);
421            }
422            prev = p.prev_sibling();
423        } else if p.kind().starts_with("line_comment") || p.kind().starts_with("block_comment") {
424            prev = p.prev_sibling();
425        } else {
426            break;
427        }
428    }
429    attrs
430}
431
432/// Return true if the function_item node has a `pub` visibility modifier.
433fn rust_function_is_pub(node: &Node<'_>, bytes: &[u8]) -> bool {
434    let mut cursor = node.walk();
435    for child in node.children(&mut cursor) {
436        if child.kind() == "visibility_modifier"
437            && let Ok(text) = std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
438        {
439            return text.starts_with("pub");
440        }
441    }
442    false
443}
444
445/// Return true if a `use_declaration` node carries a `pub` visibility
446/// modifier (i.e. it is a `pub use ...;` re-export).
447fn rust_use_is_pub(node: &Node<'_>, bytes: &[u8]) -> bool {
448    let mut cursor = node.walk();
449    for child in node.children(&mut cursor) {
450        if child.kind() == "visibility_modifier"
451            && let Ok(text) = std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
452        {
453            return text.starts_with("pub");
454        }
455    }
456    false
457}
458
459/// Collect [`EntryPoint`]s for each name re-exported by a
460/// `pub use ...;` declaration.
461///
462/// Handles the four common shapes from `docs/PLAN.md` cluster A:
463/// - `pub use ::path::to::Item;` — emit `Item` (the trailing segment).
464/// - `pub use ::path::to::*;` — emit a glob entry whose `name` is the
465///   full path (the consumer at graph-walk time fans out to every
466///   matching definition).
467/// - `pub use ::path::to::{Foo, Bar};` — emit `Foo` and `Bar`.
468/// - `pub use ::path::to::Item as Alias;` — emit `Alias` (the alias is
469///   the exported surface name).
470fn collect_rust_pub_use_entries(
471    use_decl: &Node<'_>,
472    bytes: &[u8],
473    file_path: &Path,
474    out: &mut Vec<EntryPoint>,
475) {
476    let line = u32::try_from(use_decl.start_position().row + 1).unwrap_or(u32::MAX);
477    // Find the `argument` field of the use_declaration (tree-sitter-rust
478    // names the use tree this way). Fall back to walking children if the
479    // field is missing on this grammar version.
480    let argument = use_decl.child_by_field_name("argument").or_else(|| {
481        let mut cursor = use_decl.walk();
482        let mut found: Option<Node<'_>> = None;
483        for child in use_decl.children(&mut cursor) {
484            match child.kind() {
485                "scoped_identifier" | "scoped_use_list" | "use_list" | "use_as_clause"
486                | "use_wildcard" | "identifier" => {
487                    found = Some(child);
488                    break;
489                }
490                _ => {}
491            }
492        }
493        found
494    });
495    let Some(argument) = argument else { return };
496    rust_collect_use_tree(&argument, bytes, file_path, line, out);
497}
498
499/// Recursively walk a `pub use` tree, emitting one [`EntryPoint`] per
500/// leaf name (or one glob entry per `::*`).
501fn rust_collect_use_tree(
502    node: &Node<'_>,
503    bytes: &[u8],
504    file_path: &Path,
505    line: u32,
506    out: &mut Vec<EntryPoint>,
507) {
508    match node.kind() {
509        // Wildcard: `path::*` — emit the whole path as the entry name.
510        "use_wildcard" => {
511            if let Ok(text) = std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]) {
512                let trimmed = text.trim();
513                let normalised = trimmed.replace(char::is_whitespace, "");
514                out.push(EntryPoint {
515                    name: normalised,
516                    kind: EntryPointKind::LibraryExport,
517                    file_path: file_path.to_path_buf(),
518                    line,
519                });
520            }
521        }
522        // Braced group: `path::{Foo, Bar as Baz, sub::Qux}` — recurse
523        // into each element.
524        "use_list" => {
525            let mut cursor = node.walk();
526            for child in node.children(&mut cursor) {
527                if matches!(child.kind(), "," | "{" | "}") {
528                    continue;
529                }
530                rust_collect_use_tree(&child, bytes, file_path, line, out);
531            }
532        }
533        // `path::{...}` is a `scoped_use_list`; walk into the list child.
534        "scoped_use_list" => {
535            let list = node.child_by_field_name("list").or_else(|| {
536                let mut cursor = node.walk();
537                node.children(&mut cursor).find(|c| c.kind() == "use_list")
538            });
539            if let Some(list) = list {
540                rust_collect_use_tree(&list, bytes, file_path, line, out);
541            }
542        }
543        // `path::Item as Alias` — the alias is the exported name.
544        "use_as_clause" => {
545            let alias = node.child_by_field_name("alias");
546            if let Some(alias) = alias
547                && let Ok(text) = std::str::from_utf8(&bytes[alias.start_byte()..alias.end_byte()])
548            {
549                out.push(EntryPoint {
550                    name: text.to_string(),
551                    kind: EntryPointKind::LibraryExport,
552                    file_path: file_path.to_path_buf(),
553                    line,
554                });
555            }
556        }
557        // `crate::a::b::c::Item` — the trailing identifier is the export.
558        "scoped_identifier" => {
559            let name = node.child_by_field_name("name");
560            if let Some(name) = name
561                && let Ok(text) = std::str::from_utf8(&bytes[name.start_byte()..name.end_byte()])
562            {
563                out.push(EntryPoint {
564                    name: text.to_string(),
565                    kind: EntryPointKind::LibraryExport,
566                    file_path: file_path.to_path_buf(),
567                    line,
568                });
569            }
570        }
571        // Bare `Item` (e.g. `pub use Item;`).
572        "identifier" => {
573            if let Ok(text) = std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]) {
574                out.push(EntryPoint {
575                    name: text.to_string(),
576                    kind: EntryPointKind::LibraryExport,
577                    file_path: file_path.to_path_buf(),
578                    line,
579                });
580            }
581        }
582        // Anything else (whitespace, punctuation, comments): ignore.
583        _ => {}
584    }
585}
586
587/// Walk the AST top-down looking for `#[tool_router] impl ...` blocks.
588/// For each such impl block, emit a [`EntryPointKind::FrameworkDispatched`]
589/// entry for every contained method.
590fn visit_rust_tool_router_impls(
591    node: &Node<'_>,
592    bytes: &[u8],
593    file_path: &Path,
594    out: &mut Vec<EntryPoint>,
595) {
596    if node.kind() == "impl_item" {
597        let attrs = collect_preceding_rust_attrs(node, bytes);
598        if attrs
599            .iter()
600            .any(|a| a == "tool_router" || a.starts_with("tool_router("))
601        {
602            // Walk the impl's body, emitting an entry per function_item.
603            if let Some(body) = node.child_by_field_name("body") {
604                let mut cursor = body.walk();
605                for child in body.children(&mut cursor) {
606                    if child.kind() != "function_item" {
607                        continue;
608                    }
609                    let Some(name_node) = child.child_by_field_name("name") else {
610                        continue;
611                    };
612                    let Ok(name) =
613                        std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
614                    else {
615                        continue;
616                    };
617                    let line = u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
618                    out.push(EntryPoint {
619                        name: name.to_string(),
620                        kind: EntryPointKind::FrameworkDispatched,
621                        file_path: file_path.to_path_buf(),
622                        line,
623                    });
624                }
625            }
626        }
627    }
628    let mut cursor = node.walk();
629    for child in node.children(&mut cursor) {
630        visit_rust_tool_router_impls(&child, bytes, file_path, out);
631    }
632}
633
634/// Return true if the function_item has an `extern "C"` ABI declaration
635/// as a function-modifier child (e.g. `pub extern "C" fn bar()`).
636fn rust_function_has_extern_c(node: &Node<'_>, bytes: &[u8]) -> bool {
637    let mut cursor = node.walk();
638    for child in node.children(&mut cursor) {
639        // tree-sitter-rust uses `function_modifiers` containing
640        // `extern_modifier`; the latter's child is a `string_literal`
641        // with the ABI name.
642        if child.kind() != "function_modifiers" {
643            continue;
644        }
645        let mut inner = child.walk();
646        for grandchild in child.children(&mut inner) {
647            if grandchild.kind() == "extern_modifier"
648                && let Ok(text) =
649                    std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
650                && text.contains("\"C\"")
651            {
652                return true;
653            }
654        }
655    }
656    false
657}
658
659// ---------------------------------------------------------------------------
660// Python detector
661// ---------------------------------------------------------------------------
662
663/// Python entry-point detector.
664///
665/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 + Cycle 9
666/// B-0007 widening):
667/// - `if __name__ == "__main__":` blocks at module top level (Main)
668/// - Functions directly called from within the `__main__` block (Main) —
669///   e.g. `main()` or `cli()`. Seeds the actual callable as a BFS root.
670/// - Top-level functions decorated with `@click.command()`,
671///   `@typer.command()`, or any `@X.command()` pattern (Main).
672/// - Top-level functions named in `__all__` (LibraryExport)
673/// - Functions starting with `test_` in files matching `test_*.py` /
674///   `*_test.py` or under a `tests/` directory (Test)
675#[derive(Debug, Default, Clone, Copy)]
676pub struct PythonEntryDetector;
677
678impl EntryPointDetector for PythonEntryDetector {
679    #[expect(
680        clippy::too_many_lines,
681        reason = "two-pass detection: first-pass collects top-level fn names and CLI decorators, second-pass emits entries; helper functions keep individual pieces readable"
682    )]
683    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
684        let mut entries = Vec::new();
685        let Some(tree) = parse_with(source, &tree_sitter_python::LANGUAGE.into()) else {
686            return entries;
687        };
688        let root = tree.root_node();
689        let bytes = source.as_bytes();
690
691        let is_test_file = python_is_test_file(file_path);
692
693        // First pass: collect top-level function names and CLI decorators.
694        let mut toplevel_fns: Vec<(String, u32)> = Vec::new();
695        let mut click_decorated: Vec<(String, u32)> = Vec::new();
696        {
697            let mut cursor = root.walk();
698            for child in root.children(&mut cursor) {
699                match child.kind() {
700                    "function_definition" => {
701                        if let Some(name_node) = child.child_by_field_name("name")
702                            && let Ok(name) = std::str::from_utf8(
703                                &bytes[name_node.start_byte()..name_node.end_byte()],
704                            )
705                        {
706                            let line =
707                                u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
708                            toplevel_fns.push((name.to_string(), line));
709                        }
710                    }
711                    "decorated_definition" => {
712                        if let Some(fn_node) = child.child_by_field_name("definition")
713                            && fn_node.kind() == "function_definition"
714                            && let Some(name_node) = fn_node.child_by_field_name("name")
715                            && let Ok(name) = std::str::from_utf8(
716                                &bytes[name_node.start_byte()..name_node.end_byte()],
717                            )
718                        {
719                            let line =
720                                u32::try_from(fn_node.start_position().row + 1).unwrap_or(u32::MAX);
721                            toplevel_fns.push((name.to_string(), line));
722                            if python_has_cli_command_decorator(&child, bytes) {
723                                click_decorated.push((name.to_string(), line));
724                            }
725                        }
726                    }
727                    _ => {}
728                }
729            }
730        }
731
732        // Second pass: emit entry points.
733        let mut cursor = root.walk();
734        for child in root.children(&mut cursor) {
735            match child.kind() {
736                "if_statement" if python_is_dunder_main_block(&child, bytes) => {
737                    let line = u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
738                    entries.push(EntryPoint {
739                        name: "__main__".to_string(),
740                        kind: EntryPointKind::Main,
741                        file_path: file_path.to_path_buf(),
742                        line,
743                    });
744                    // Also emit functions called directly in the block body.
745                    // tree-sitter-python uses "consequence" for the block,
746                    // not "body".
747                    if let Some(body) = child.child_by_field_name("consequence") {
748                        for called in python_direct_calls_in_block(&body, bytes) {
749                            let fn_line = toplevel_fns
750                                .iter()
751                                .find(|(n, _)| n == &called)
752                                .map(|(_, l)| *l)
753                                .unwrap_or(line);
754                            entries.push(EntryPoint {
755                                name: called,
756                                kind: EntryPointKind::Main,
757                                file_path: file_path.to_path_buf(),
758                                line: fn_line,
759                            });
760                        }
761                    }
762                }
763                "expression_statement" => {
764                    // `__all__ = [...]` is an expression_statement
765                    // containing an assignment.
766                    if let Some(names) = python_extract_dunder_all(&child, bytes) {
767                        let line =
768                            u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
769                        for n in names {
770                            entries.push(EntryPoint {
771                                name: n,
772                                kind: EntryPointKind::LibraryExport,
773                                file_path: file_path.to_path_buf(),
774                                line,
775                            });
776                        }
777                    }
778                }
779                "function_definition" | "decorated_definition" => {
780                    let fn_node = if child.kind() == "decorated_definition" {
781                        child.child_by_field_name("definition")
782                    } else {
783                        Some(child)
784                    };
785                    if let Some(fn_node) = fn_node
786                        && fn_node.kind() == "function_definition"
787                        && let Some(name_node) = fn_node.child_by_field_name("name")
788                        && let Ok(name) = std::str::from_utf8(
789                            &bytes[name_node.start_byte()..name_node.end_byte()],
790                        )
791                        && is_test_file
792                        && name.starts_with("test_")
793                    {
794                        let line =
795                            u32::try_from(fn_node.start_position().row + 1).unwrap_or(u32::MAX);
796                        entries.push(EntryPoint {
797                            name: name.to_string(),
798                            kind: EntryPointKind::Test,
799                            file_path: file_path.to_path_buf(),
800                            line,
801                        });
802                    }
803                }
804                _ => {}
805            }
806        }
807
808        // Emit @click.command() / @typer.command() decorated functions.
809        for (name, line) in click_decorated {
810            entries.push(EntryPoint {
811                name,
812                kind: EntryPointKind::Main,
813                file_path: file_path.to_path_buf(),
814                line,
815            });
816        }
817
818        entries
819    }
820}
821
822fn python_is_test_file(file_path: &Path) -> bool {
823    let Some(file_name) = file_path.file_name().and_then(|s| s.to_str()) else {
824        return false;
825    };
826    let is_py = Path::new(file_name)
827        .extension()
828        .is_some_and(|ext| ext.eq_ignore_ascii_case("py"));
829    if !is_py {
830        return false;
831    }
832    let stem = Path::new(file_name)
833        .file_stem()
834        .and_then(|s| s.to_str())
835        .unwrap_or("");
836    if stem.starts_with("test_") || stem.ends_with("_test") {
837        return true;
838    }
839    // Any component named `tests` in the parent directory chain.
840    file_path
841        .components()
842        .any(|c| c.as_os_str() == std::ffi::OsStr::new("tests"))
843}
844
845fn python_is_dunder_main_block(node: &Node<'_>, bytes: &[u8]) -> bool {
846    // if condition: comparison `__name__ == "__main__"`.
847    let cond = node.child_by_field_name("condition");
848    let Some(cond) = cond else { return false };
849    let Ok(text) = std::str::from_utf8(&bytes[cond.start_byte()..cond.end_byte()]) else {
850        return false;
851    };
852    // Tolerate single or double quotes around `__main__`.
853    let normalized = text.replace(' ', "");
854    normalized.contains("__name__==\"__main__\"")
855        || normalized.contains("__name__=='__main__'")
856        || normalized.contains("\"__main__\"==__name__")
857        || normalized.contains("'__main__'==__name__")
858}
859
860/// Extract the string literals from a top-level `__all__ = [...]`
861/// assignment. Returns `None` if the statement is not such an assignment.
862fn python_extract_dunder_all(node: &Node<'_>, bytes: &[u8]) -> Option<Vec<String>> {
863    // expression_statement -> assignment (left, right)
864    let mut cursor = node.walk();
865    for child in node.children(&mut cursor) {
866        if child.kind() == "assignment" {
867            let left = child.child_by_field_name("left")?;
868            let right = child.child_by_field_name("right")?;
869            let left_text = std::str::from_utf8(&bytes[left.start_byte()..left.end_byte()]).ok()?;
870            if left_text.trim() != "__all__" {
871                return None;
872            }
873            // right is typically a `list` or `tuple` node containing
874            // `string` children.
875            let mut names = Vec::new();
876            let mut inner = right.walk();
877            for grandchild in right.children(&mut inner) {
878                if grandchild.kind() != "string" {
879                    continue;
880                }
881                // Walk the string node to find string_content child.
882                let mut sc = grandchild.walk();
883                let mut content_text: Option<String> = None;
884                for sg in grandchild.children(&mut sc) {
885                    if sg.kind() == "string_content"
886                        && let Ok(t) = std::str::from_utf8(&bytes[sg.start_byte()..sg.end_byte()])
887                    {
888                        content_text = Some(t.to_string());
889                    }
890                }
891                if let Some(t) = content_text {
892                    names.push(t);
893                } else if let Ok(raw) =
894                    std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
895                {
896                    // Fallback: strip the outer quotes from the raw
897                    // string text.
898                    let trimmed = raw.trim_matches(|c| c == '"' || c == '\'');
899                    names.push(trimmed.to_string());
900                }
901            }
902            return Some(names);
903        }
904    }
905    None
906}
907
908/// Return the names of functions called as bare `name()` (no attribute
909/// access) in the immediate statement children of a Python block node.
910///
911/// Used to extract the callable(s) invoked from an
912/// `if __name__ == "__main__":` block body — typically a single call like
913/// `main()` or `cli()`.  Restricted to top-level
914/// `expression_statement → call → identifier` to avoid over-seeding.
915fn python_direct_calls_in_block(block: &Node<'_>, bytes: &[u8]) -> Vec<String> {
916    let mut names = Vec::new();
917    let mut cursor = block.walk();
918    for stmt in block.children(&mut cursor) {
919        if stmt.kind() != "expression_statement" {
920            continue;
921        }
922        let mut sc = stmt.walk();
923        for expr in stmt.children(&mut sc) {
924            if expr.kind() != "call" {
925                continue;
926            }
927            if let Some(func_node) = expr.child_by_field_name("function")
928                && func_node.kind() == "identifier"
929                && let Ok(name) =
930                    std::str::from_utf8(&bytes[func_node.start_byte()..func_node.end_byte()])
931                && !name.is_empty()
932            {
933                names.push(name.to_string());
934            }
935        }
936    }
937    names
938}
939
940/// Return true if a `decorated_definition` node carries a CLI command
941/// decorator matching `@X.command()` or `@X.command` (where X is any
942/// module name such as `click`, `typer`, `app`, `cli`, etc.).
943///
944/// Only `.command` attribute access is matched — this covers all major
945/// Python CLI frameworks conservatively.
946fn python_has_cli_command_decorator(decorated_def: &Node<'_>, bytes: &[u8]) -> bool {
947    let mut cursor = decorated_def.walk();
948    for child in decorated_def.children(&mut cursor) {
949        if child.kind() != "decorator" {
950            continue;
951        }
952        let mut dc = child.walk();
953        for inner in child.children(&mut dc) {
954            match inner.kind() {
955                "call" => {
956                    if let Some(func) = inner.child_by_field_name("function")
957                        && func.kind() == "attribute"
958                        && let Some(prop) = func.child_by_field_name("attribute")
959                        && let Ok(prop_text) =
960                            std::str::from_utf8(&bytes[prop.start_byte()..prop.end_byte()])
961                        && prop_text == "command"
962                    {
963                        return true;
964                    }
965                }
966                "attribute" => {
967                    if let Some(prop) = inner.child_by_field_name("attribute")
968                        && let Ok(prop_text) =
969                            std::str::from_utf8(&bytes[prop.start_byte()..prop.end_byte()])
970                        && prop_text == "command"
971                    {
972                        return true;
973                    }
974                }
975                _ => {}
976            }
977        }
978    }
979    false
980}
981
982// ---------------------------------------------------------------------------
983// Go detector
984// ---------------------------------------------------------------------------
985
986/// Go entry-point detector.
987///
988/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
989/// - `func main()` in `package main` (Main)
990/// - `func init()` (Init) — runs automatically at package load
991/// - Functions starting with `Test`, `Benchmark`, `Example`, `Fuzz` (Test)
992/// - Exported names (starting with uppercase) in library packages
993///   (LibraryExport)
994#[derive(Debug, Default, Clone, Copy)]
995pub struct GoEntryDetector;
996
997impl EntryPointDetector for GoEntryDetector {
998    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
999        let mut entries = Vec::new();
1000        let Some(tree) = parse_with(source, &tree_sitter_go::LANGUAGE.into()) else {
1001            return entries;
1002        };
1003        let root = tree.root_node();
1004        let bytes = source.as_bytes();
1005
1006        // Determine the package name. `package main` enables `main` as
1007        // the binary entry; non-main packages are libraries whose
1008        // exported names are library entries.
1009        let package_name = go_package_name(&root, bytes).unwrap_or_default();
1010        let is_main_package = package_name == "main";
1011
1012        // Walk top-level function_declaration / method_declaration nodes.
1013        let mut cursor = root.walk();
1014        for child in root.children(&mut cursor) {
1015            match child.kind() {
1016                "function_declaration" => {
1017                    if let Some(name_node) = child.child_by_field_name("name")
1018                        && let Ok(name) = std::str::from_utf8(
1019                            &bytes[name_node.start_byte()..name_node.end_byte()],
1020                        )
1021                    {
1022                        let line =
1023                            u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
1024                        go_classify(name, line, is_main_package, file_path, &mut entries);
1025                    }
1026                }
1027                "method_declaration" => {
1028                    // Methods participate in LibraryExport only — main / init
1029                    // / Test* are exclusively free functions.
1030                    if let Some(name_node) = child.child_by_field_name("name")
1031                        && let Ok(name) = std::str::from_utf8(
1032                            &bytes[name_node.start_byte()..name_node.end_byte()],
1033                        )
1034                        && !is_main_package
1035                        && go_is_exported(name)
1036                    {
1037                        let line =
1038                            u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
1039                        entries.push(EntryPoint {
1040                            name: name.to_string(),
1041                            kind: EntryPointKind::LibraryExport,
1042                            file_path: file_path.to_path_buf(),
1043                            line,
1044                        });
1045                    }
1046                }
1047                _ => {}
1048            }
1049        }
1050
1051        entries
1052    }
1053}
1054
1055fn go_package_name(root: &Node<'_>, bytes: &[u8]) -> Option<String> {
1056    let mut cursor = root.walk();
1057    for child in root.children(&mut cursor) {
1058        if child.kind() != "package_clause" {
1059            continue;
1060        }
1061        let mut inner = child.walk();
1062        for grandchild in child.children(&mut inner) {
1063            if grandchild.kind() == "package_identifier"
1064                && let Ok(text) =
1065                    std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
1066            {
1067                return Some(text.to_string());
1068            }
1069        }
1070    }
1071    None
1072}
1073
1074fn go_classify(
1075    name: &str,
1076    line: u32,
1077    is_main_package: bool,
1078    file_path: &Path,
1079    out: &mut Vec<EntryPoint>,
1080) {
1081    if name == "main" && is_main_package {
1082        out.push(EntryPoint {
1083            name: name.to_string(),
1084            kind: EntryPointKind::Main,
1085            file_path: file_path.to_path_buf(),
1086            line,
1087        });
1088        return;
1089    }
1090    if name == "init" {
1091        out.push(EntryPoint {
1092            name: name.to_string(),
1093            kind: EntryPointKind::Init,
1094            file_path: file_path.to_path_buf(),
1095            line,
1096        });
1097        return;
1098    }
1099    if name.starts_with("Test")
1100        || name.starts_with("Benchmark")
1101        || name.starts_with("Example")
1102        || name.starts_with("Fuzz")
1103    {
1104        out.push(EntryPoint {
1105            name: name.to_string(),
1106            kind: EntryPointKind::Test,
1107            file_path: file_path.to_path_buf(),
1108            line,
1109        });
1110        return;
1111    }
1112    if !is_main_package && go_is_exported(name) {
1113        out.push(EntryPoint {
1114            name: name.to_string(),
1115            kind: EntryPointKind::LibraryExport,
1116            file_path: file_path.to_path_buf(),
1117            line,
1118        });
1119    }
1120}
1121
1122/// Return true if `name` starts with an ASCII uppercase letter, which is
1123/// Go's syntactic rule for an exported (package-public) identifier.
1124fn go_is_exported(name: &str) -> bool {
1125    name.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1126}
1127
1128// ---------------------------------------------------------------------------
1129// C detector
1130// ---------------------------------------------------------------------------
1131
1132/// C entry-point detector (I#73, Cycle 7 / 4.1.4).
1133///
1134/// Detects the following patterns as entry points:
1135/// - Any `function_definition` whose declarator resolves to the identifier
1136///   `main` — regardless of return type or parameter shape. This covers
1137///   `int main()`, `int main(int argc, char **argv)`, and the rare
1138///   pointer-return variant `int *main(void)`.
1139/// - `__attribute__((constructor))` annotated functions → [`EntryPointKind::Init`].
1140///   These run before `main` via ELF `.init_array`; they are BFS roots.
1141/// - `// export NAME` line comments (cgo-style FFI) → [`EntryPointKind::Ffi`].
1142///   Cgo emits these above C wrapper stubs for Go functions exported to C.
1143///
1144/// The detector does **not** attempt full C pre-processing. Macros that
1145/// expand to `main` or `__attribute__((constructor))` are not detected —
1146/// this is an acceptable limitation at this stage.
1147#[derive(Debug, Default, Clone, Copy)]
1148pub struct CEntryDetector;
1149
1150impl EntryPointDetector for CEntryDetector {
1151    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
1152        let mut entries = Vec::new();
1153        let Some(tree) = parse_with(source, &tree_sitter_c::LANGUAGE.into()) else {
1154            return entries;
1155        };
1156        let root = tree.root_node();
1157        let bytes = source.as_bytes();
1158
1159        // Collect cgo `//export NAME` comments first — they are not tied to
1160        // any AST function node; we scan the source text directly.
1161        c_collect_cgo_exports(source, file_path, &mut entries);
1162
1163        // Walk top-level declarations.
1164        let mut cursor = root.walk();
1165        for child in root.children(&mut cursor) {
1166            if child.kind() == "function_definition" {
1167                c_classify_function(&child, bytes, file_path, &mut entries);
1168            }
1169        }
1170
1171        entries
1172    }
1173}
1174
1175/// Classify a C `function_definition` node and emit entry points as
1176/// appropriate.
1177///
1178/// Handles three cases:
1179/// 1. Declarator is a `function_declarator` directly — covers `int main(…)`.
1180/// 2. Declarator is a `pointer_declarator` wrapping a `function_declarator`
1181///    — covers `int *main(void)`.
1182/// 3. The function has an `__attribute__((constructor))` specifier — emit
1183///    an additional [`EntryPointKind::Init`] entry.
1184fn c_classify_function(
1185    node: &tree_sitter::Node<'_>,
1186    bytes: &[u8],
1187    file_path: &Path,
1188    out: &mut Vec<EntryPoint>,
1189) {
1190    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1191
1192    // Resolve the innermost identifier name from the declarator.
1193    let Some(declarator) = node.child_by_field_name("declarator") else {
1194        return;
1195    };
1196    let Some(name) = c_resolve_function_name(&declarator, bytes) else {
1197        return;
1198    };
1199
1200    // Check for `__attribute__((constructor))` — the attribute_specifier
1201    // appears as a direct child of the function_definition (before the
1202    // type or declarator fields).
1203    let has_constructor_attr = c_has_constructor_attribute(node, bytes);
1204
1205    if name == "main" {
1206        out.push(EntryPoint {
1207            name: name.clone(),
1208            kind: EntryPointKind::Main,
1209            file_path: file_path.to_path_buf(),
1210            line,
1211        });
1212    }
1213
1214    if has_constructor_attr {
1215        out.push(EntryPoint {
1216            name,
1217            kind: EntryPointKind::Init,
1218            file_path: file_path.to_path_buf(),
1219            line,
1220        });
1221    }
1222}
1223
1224/// Walk a C declarator node to extract the innermost `identifier` that
1225/// names the function. Handles:
1226/// - `function_declarator` with `declarator: identifier` (direct case)
1227/// - `pointer_declarator` → `function_declarator` → `identifier` (pointer
1228///   return type)
1229///
1230/// Returns `None` if the name cannot be resolved (e.g., anonymous
1231/// declarations or grammar variants not covered here).
1232fn c_resolve_function_name(declarator: &tree_sitter::Node<'_>, bytes: &[u8]) -> Option<String> {
1233    match declarator.kind() {
1234        "function_declarator" => {
1235            // The inner declarator field holds the name.
1236            let inner = declarator.child_by_field_name("declarator")?;
1237            c_resolve_function_name(&inner, bytes)
1238        }
1239        "pointer_declarator" => {
1240            // Recurse: pointer_declarator wraps another declarator.
1241            let inner = declarator.child_by_field_name("declarator")?;
1242            c_resolve_function_name(&inner, bytes)
1243        }
1244        "identifier" => {
1245            let text =
1246                std::str::from_utf8(&bytes[declarator.start_byte()..declarator.end_byte()]).ok()?;
1247            Some(text.to_string())
1248        }
1249        _ => None,
1250    }
1251}
1252
1253/// Return true if the `function_definition` node has an
1254/// `__attribute__((constructor))` specifier as a direct child.
1255///
1256/// In tree-sitter-c the attribute appears as an `attribute_specifier`
1257/// child of `function_definition` (before the `type` field). The
1258/// `attribute_specifier` contains an `argument_list` whose first
1259/// element is an `identifier` with text `"constructor"`.
1260fn c_has_constructor_attribute(node: &tree_sitter::Node<'_>, bytes: &[u8]) -> bool {
1261    let mut cursor = node.walk();
1262    for child in node.children(&mut cursor) {
1263        if child.kind() != "attribute_specifier" {
1264            continue;
1265        }
1266        // Look for an argument_list child containing "constructor".
1267        let mut inner = child.walk();
1268        for grandchild in child.children(&mut inner) {
1269            if grandchild.kind() != "argument_list" {
1270                continue;
1271            }
1272            let mut arg_cur = grandchild.walk();
1273            for arg in grandchild.children(&mut arg_cur) {
1274                if arg.kind() == "identifier"
1275                    && std::str::from_utf8(&bytes[arg.start_byte()..arg.end_byte()])
1276                        .is_ok_and(|t| t == "constructor")
1277                {
1278                    return true;
1279                }
1280            }
1281        }
1282    }
1283    false
1284}
1285
1286/// Scan the raw source for cgo-style `//export NAME` line comments and
1287/// emit [`EntryPointKind::Ffi`] entries for each exported name found.
1288///
1289/// cgo inserts these above the C stub for each Go function exported to C.
1290/// They are not associated with any AST node, so we scan the source text
1291/// directly. The format is exactly `//export <ident>` (no space after `//`).
1292fn c_collect_cgo_exports(source: &str, file_path: &Path, out: &mut Vec<EntryPoint>) {
1293    for (i, line) in source.lines().enumerate() {
1294        let trimmed = line.trim();
1295        if let Some(rest) = trimmed.strip_prefix("//export ") {
1296            let name = rest.trim();
1297            if !name.is_empty() && name.chars().all(|c| c.is_alphanumeric() || c == '_') {
1298                let line_num = u32::try_from(i + 1).unwrap_or(u32::MAX);
1299                out.push(EntryPoint {
1300                    name: name.to_string(),
1301                    kind: EntryPointKind::Ffi,
1302                    file_path: file_path.to_path_buf(),
1303                    line: line_num,
1304                });
1305            }
1306        }
1307    }
1308}
1309
1310// ---------------------------------------------------------------------------
1311// JavaScript / TypeScript detector
1312// ---------------------------------------------------------------------------
1313
1314/// JavaScript and TypeScript entry-point detector (I#70, Cycle 7 / 4.1.4).
1315///
1316/// Detects the following patterns:
1317/// - `export default function NAME(…)` → [`EntryPointKind::LibraryExport`]
1318/// - `export default class NAME` → [`EntryPointKind::LibraryExport`]
1319/// - `export const NAME = (…) => …` (named arrow export) →
1320///   [`EntryPointKind::LibraryExport`]
1321/// - `module.exports = …` → [`EntryPointKind::LibraryExport`] (named
1322///   `module.exports`)
1323/// - `exports.NAME = …` → [`EntryPointKind::LibraryExport`]
1324/// - `test(…)`, `it(…)`, `describe(…)` calls in `*.test.js` /
1325///   `*.spec.js` / `*.test.ts` / `*.spec.ts` files →
1326///   [`EntryPointKind::Test`]
1327///
1328/// The detector uses `tree-sitter-javascript` for all JS/TS sources.
1329/// TypeScript-specific syntax (type annotations, decorators) is handled
1330/// gracefully because tree-sitter-javascript degrades cleanly on TS.
1331#[derive(Debug, Default, Clone, Copy)]
1332pub struct JsEntryDetector;
1333
1334impl EntryPointDetector for JsEntryDetector {
1335    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
1336        let mut entries = Vec::new();
1337        let Some(tree) = parse_with(source, &tree_sitter_javascript::LANGUAGE.into()) else {
1338            return entries;
1339        };
1340        let root = tree.root_node();
1341        let bytes = source.as_bytes();
1342
1343        let is_test_file = js_is_test_file(file_path);
1344
1345        // First pass: detect the CommonJS object-accumulation alias
1346        // `var app = exports = module.exports = {}` so the second pass
1347        // can recognise `app.METHOD = function...` as LibraryExport.
1348        let module_exports_alias = js_find_module_exports_alias(&root, bytes);
1349
1350        let mut cursor = root.walk();
1351        for child in root.children(&mut cursor) {
1352            match child.kind() {
1353                "export_statement" => {
1354                    js_classify_export(&child, bytes, file_path, &mut entries);
1355                }
1356                "expression_statement" => {
1357                    js_classify_expression_statement(
1358                        &child,
1359                        bytes,
1360                        file_path,
1361                        is_test_file,
1362                        module_exports_alias.as_deref(),
1363                        &mut entries,
1364                    );
1365                }
1366                _ => {}
1367            }
1368        }
1369
1370        entries
1371    }
1372}
1373
1374/// Return true if the file path indicates a JS/TS test file.
1375///
1376/// Matches `*.test.js`, `*.spec.js`, `*.test.ts`, `*.spec.ts`,
1377/// `*.test.jsx`, `*.spec.jsx`, `*.test.tsx`, `*.spec.tsx` and any file
1378/// under a `__tests__` directory.
1379fn js_is_test_file(file_path: &Path) -> bool {
1380    let Some(file_name) = file_path.file_name().and_then(|s| s.to_str()) else {
1381        return false;
1382    };
1383    // Check for .test.* or .spec.* before the final extension.
1384    let stem_lower = file_name.to_ascii_lowercase();
1385    if stem_lower.contains(".test.") || stem_lower.contains(".spec.") {
1386        return true;
1387    }
1388    // Check for __tests__ directory component.
1389    file_path
1390        .components()
1391        .any(|c| c.as_os_str() == std::ffi::OsStr::new("__tests__"))
1392}
1393
1394/// Scan the top-level AST for the CommonJS object-accumulation pattern
1395/// `var ALIAS = exports = module.exports = {}` and return the local
1396/// variable name (`ALIAS`) if found.
1397///
1398/// The pattern appears in express's `lib/application.js`:
1399/// ```text
1400/// var app = exports = module.exports = {};
1401/// ```
1402/// Subsequent `app.use = function...`, `app.handle = function...` etc.
1403/// are all library exports whose reachability depends on knowing `app` is
1404/// the module.exports alias.
1405///
1406/// Returns the first alias found; returns `None` when the pattern is absent.
1407fn js_find_module_exports_alias(root: &tree_sitter::Node<'_>, bytes: &[u8]) -> Option<String> {
1408    let mut cursor = root.walk();
1409    for child in root.children(&mut cursor) {
1410        if child.kind() != "variable_declaration" {
1411            continue;
1412        }
1413        let mut vc = child.walk();
1414        for decl in child.children(&mut vc) {
1415            if decl.kind() != "variable_declarator" {
1416                continue;
1417            }
1418            let Some(name_node) = decl.child_by_field_name("name") else {
1419                continue;
1420            };
1421            let Ok(alias) =
1422                std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1423            else {
1424                continue;
1425            };
1426            let Some(value) = decl.child_by_field_name("value") else {
1427                continue;
1428            };
1429            if js_assignment_reaches_module_exports(&value, bytes) {
1430                return Some(alias.to_string());
1431            }
1432        }
1433    }
1434    None
1435}
1436
1437/// Return true if `node` is an `assignment_expression` (possibly nested)
1438/// that has `module.exports` as one of its left-hand sides.
1439///
1440/// Handles both direct and chained forms:
1441/// - `module.exports = {}`
1442/// - `exports = module.exports = {}`
1443fn js_assignment_reaches_module_exports(node: &tree_sitter::Node<'_>, bytes: &[u8]) -> bool {
1444    if node.kind() != "assignment_expression" {
1445        return false;
1446    }
1447    if let Some(left) = node.child_by_field_name("left")
1448        && left.kind() == "member_expression"
1449        && let (Some(obj), Some(prop)) = (
1450            left.child_by_field_name("object"),
1451            left.child_by_field_name("property"),
1452        )
1453    {
1454        let obj_text = std::str::from_utf8(&bytes[obj.start_byte()..obj.end_byte()]).unwrap_or("");
1455        let prop_text =
1456            std::str::from_utf8(&bytes[prop.start_byte()..prop.end_byte()]).unwrap_or("");
1457        if obj_text == "module" && prop_text == "exports" {
1458            return true;
1459        }
1460    }
1461    // Recurse into RHS for chained assignments.
1462    node.child_by_field_name("right")
1463        .is_some_and(|right| js_assignment_reaches_module_exports(&right, bytes))
1464}
1465
1466/// Classify a top-level `export_statement` node and emit entry points.
1467///
1468/// Handles the patterns:
1469/// - `export default function NAME(…)` / `export default class NAME`
1470/// - `export const NAME = (…) => …` (named arrow-function export)
1471fn js_classify_export(
1472    node: &tree_sitter::Node<'_>,
1473    bytes: &[u8],
1474    file_path: &Path,
1475    out: &mut Vec<EntryPoint>,
1476) {
1477    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1478
1479    // Walk children to find the `declaration` field (or the `default`
1480    // keyword plus inline declaration). tree-sitter-javascript uses the
1481    // `declaration` field for named exports and places the value inline
1482    // after `default` for default exports.
1483    let mut cursor = node.walk();
1484    for child in node.children(&mut cursor) {
1485        match child.kind() {
1486            "function_declaration" => {
1487                // `export default function NAME(…)` or `export function NAME(…)`.
1488                if let Some(name_node) = child.child_by_field_name("name")
1489                    && let Ok(name) =
1490                        std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1491                {
1492                    out.push(EntryPoint {
1493                        name: name.to_string(),
1494                        kind: EntryPointKind::LibraryExport,
1495                        file_path: file_path.to_path_buf(),
1496                        line,
1497                    });
1498                }
1499            }
1500            "class_declaration" => {
1501                // `export default class NAME` or `export class NAME`.
1502                if let Some(name_node) = child.child_by_field_name("name")
1503                    && let Ok(name) =
1504                        std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1505                {
1506                    out.push(EntryPoint {
1507                        name: name.to_string(),
1508                        kind: EntryPointKind::LibraryExport,
1509                        file_path: file_path.to_path_buf(),
1510                        line,
1511                    });
1512                }
1513            }
1514            "lexical_declaration" => {
1515                // `export const NAME = (…) => …` — walk variable declarators.
1516                js_collect_lexical_exports(&child, bytes, file_path, line, out);
1517            }
1518            _ => {}
1519        }
1520    }
1521}
1522
1523/// Walk a `lexical_declaration` (const/let) inside an export statement
1524/// and emit LibraryExport entries for each variable whose value is an
1525/// arrow function or function expression.
1526fn js_collect_lexical_exports(
1527    node: &tree_sitter::Node<'_>,
1528    bytes: &[u8],
1529    file_path: &Path,
1530    line: u32,
1531    out: &mut Vec<EntryPoint>,
1532) {
1533    let mut cursor = node.walk();
1534    for child in node.children(&mut cursor) {
1535        if child.kind() != "variable_declarator" {
1536            continue;
1537        }
1538        let Some(name_node) = child.child_by_field_name("name") else {
1539            continue;
1540        };
1541        let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1542        else {
1543            continue;
1544        };
1545        // Only emit if the RHS is a function-like value (arrow or
1546        // function expression). Non-function exports (e.g. string
1547        // constants) are not entry points.
1548        if child
1549            .child_by_field_name("value")
1550            .is_some_and(|v| matches!(v.kind(), "arrow_function" | "function_expression"))
1551        {
1552            out.push(EntryPoint {
1553                name: name.to_string(),
1554                kind: EntryPointKind::LibraryExport,
1555                file_path: file_path.to_path_buf(),
1556                line,
1557            });
1558        }
1559    }
1560}
1561
1562/// Classify a top-level `expression_statement` node.
1563///
1564/// Handles:
1565/// - `module.exports = …` — emits a LibraryExport entry named `module.exports`.
1566/// - `exports.NAME = …` — emits a LibraryExport entry named `NAME`.
1567/// - `exports = module.exports = VALUE` — chained assignment; emits
1568///   LibraryExport for the value name when it is an identifier.
1569/// - `ALIAS.METHOD = function...` — emits LibraryExport named `METHOD`
1570///   when `ALIAS` is the known `module.exports` alias (B-0010).
1571/// - `test(…)` / `it(…)` / `describe(…)` — emits a Test entry when in a
1572///   test file.
1573fn js_classify_expression_statement(
1574    node: &tree_sitter::Node<'_>,
1575    bytes: &[u8],
1576    file_path: &Path,
1577    is_test_file: bool,
1578    module_exports_alias: Option<&str>,
1579    out: &mut Vec<EntryPoint>,
1580) {
1581    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1582
1583    let mut cursor = node.walk();
1584    for child in node.children(&mut cursor) {
1585        match child.kind() {
1586            "assignment_expression" => {
1587                js_classify_assignment(&child, bytes, file_path, line, module_exports_alias, out);
1588            }
1589            "call_expression" if is_test_file => {
1590                js_classify_test_call(&child, bytes, file_path, line, out);
1591            }
1592            _ => {}
1593        }
1594    }
1595}
1596
1597/// Classify an `assignment_expression` for CommonJS export patterns.
1598///
1599/// Emits:
1600/// - `module.exports = …` → LibraryExport named `"module.exports"`
1601/// - `exports.NAME = …` → LibraryExport named `NAME`
1602/// - `exports = module.exports = VALUE` → recurses, seeding VALUE name
1603/// - `ALIAS.METHOD = function...` → LibraryExport named `METHOD` when
1604///   `ALIAS` is the known module.exports alias (B-0010)
1605fn js_classify_assignment(
1606    node: &tree_sitter::Node<'_>,
1607    bytes: &[u8],
1608    file_path: &Path,
1609    line: u32,
1610    module_exports_alias: Option<&str>,
1611    out: &mut Vec<EntryPoint>,
1612) {
1613    let Some(left) = node.child_by_field_name("left") else {
1614        return;
1615    };
1616
1617    // Handle chained assignment: `exports = module.exports = VALUE`
1618    // The left side is a bare `exports` identifier, and the RHS is
1619    // another assignment_expression (or a direct value).
1620    if left.kind() == "identifier" {
1621        let Ok(left_name) = std::str::from_utf8(&bytes[left.start_byte()..left.end_byte()]) else {
1622            return;
1623        };
1624        if left_name == "exports"
1625            && let Some(right) = node.child_by_field_name("right")
1626        {
1627            if right.kind() == "assignment_expression" {
1628                // Recurse: `exports = module.exports = VALUE`
1629                js_classify_assignment(&right, bytes, file_path, line, module_exports_alias, out);
1630            } else {
1631                // `exports = VALUE` — emit VALUE if it is a named fn.
1632                js_emit_identifier_as_export(&right, bytes, file_path, line, out);
1633            }
1634        }
1635        return;
1636    }
1637
1638    if left.kind() != "member_expression" {
1639        return;
1640    }
1641    let Some(obj_node) = left.child_by_field_name("object") else {
1642        return;
1643    };
1644    let Some(prop_node) = left.child_by_field_name("property") else {
1645        return;
1646    };
1647    let Ok(obj) = std::str::from_utf8(&bytes[obj_node.start_byte()..obj_node.end_byte()]) else {
1648        return;
1649    };
1650    let Ok(prop) = std::str::from_utf8(&bytes[prop_node.start_byte()..prop_node.end_byte()]) else {
1651        return;
1652    };
1653
1654    if obj == "module" && prop == "exports" {
1655        // `module.exports = ...` — the whole module is exported.
1656        out.push(EntryPoint {
1657            name: "module.exports".to_string(),
1658            kind: EntryPointKind::LibraryExport,
1659            file_path: file_path.to_path_buf(),
1660            line,
1661        });
1662    } else if obj == "exports" {
1663        // `exports.NAME = ...` — a named CommonJS export.
1664        out.push(EntryPoint {
1665            name: prop.to_string(),
1666            kind: EntryPointKind::LibraryExport,
1667            file_path: file_path.to_path_buf(),
1668            line,
1669        });
1670    } else if module_exports_alias.is_some_and(|alias| alias == obj) {
1671        // `ALIAS.METHOD = function...` — CommonJS object-accumulation.
1672        // Only emit when the RHS is a function-like value.
1673        let is_fn = node.child_by_field_name("right").is_some_and(|v| {
1674            matches!(
1675                v.kind(),
1676                "function_expression" | "arrow_function" | "function_declaration"
1677            )
1678        });
1679        if is_fn {
1680            out.push(EntryPoint {
1681                name: prop.to_string(),
1682                kind: EntryPointKind::LibraryExport,
1683                file_path: file_path.to_path_buf(),
1684                line,
1685            });
1686        }
1687    }
1688}
1689
1690/// Emit a LibraryExport entry when `node` is an identifier naming a
1691/// function or factory (used for `exports = VALUE` assignments).
1692fn js_emit_identifier_as_export(
1693    node: &tree_sitter::Node<'_>,
1694    bytes: &[u8],
1695    file_path: &Path,
1696    line: u32,
1697    out: &mut Vec<EntryPoint>,
1698) {
1699    if node.kind() == "identifier"
1700        && let Ok(name) = std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()])
1701        && !name.is_empty()
1702    {
1703        out.push(EntryPoint {
1704            name: name.to_string(),
1705            kind: EntryPointKind::LibraryExport,
1706            file_path: file_path.to_path_buf(),
1707            line,
1708        });
1709    }
1710}
1711
1712/// Classify a `call_expression` for Jest/Vitest test runner patterns.
1713///
1714/// Emits Test entries for `test(…)`, `it(…)`, and `describe(…)` calls.
1715/// The first string argument (if present) becomes the entry name; falls
1716/// back to the call function name when the first argument is not a string.
1717fn js_classify_test_call(
1718    node: &tree_sitter::Node<'_>,
1719    bytes: &[u8],
1720    file_path: &Path,
1721    line: u32,
1722    out: &mut Vec<EntryPoint>,
1723) {
1724    let Some(func_node) = node.child_by_field_name("function") else {
1725        return;
1726    };
1727    let Ok(func_name) = std::str::from_utf8(&bytes[func_node.start_byte()..func_node.end_byte()])
1728    else {
1729        return;
1730    };
1731    if !matches!(func_name, "test" | "it" | "describe") {
1732        return;
1733    }
1734
1735    // Try to extract the first string argument as the test name.
1736    let entry_name = node
1737        .child_by_field_name("arguments")
1738        .and_then(|args| {
1739            let mut c = args.walk();
1740            args.children(&mut c).find(|ch| ch.kind() == "string")
1741        })
1742        .and_then(|s| {
1743            // String node: look for a string_fragment child.
1744            let mut c = s.walk();
1745            s.children(&mut c).find(|ch| ch.kind() == "string_fragment")
1746        })
1747        .and_then(|frag| {
1748            std::str::from_utf8(&bytes[frag.start_byte()..frag.end_byte()])
1749                .ok()
1750                .map(ToString::to_string)
1751        })
1752        .unwrap_or_else(|| func_name.to_string());
1753
1754    out.push(EntryPoint {
1755        name: entry_name,
1756        kind: EntryPointKind::Test,
1757        file_path: file_path.to_path_buf(),
1758        line,
1759    });
1760}
1761
1762// ---------------------------------------------------------------------------
1763// Java detector (B-0009, Cycle 9 / 4.1.5)
1764// ---------------------------------------------------------------------------
1765
1766/// Java entry-point detector (B-0009, Cycle 9).
1767///
1768/// Detects the entry-point shapes that anchor JVM-ecosystem call graphs.
1769/// Before this detector, `find_dead_code` returned `dead_fraction = 1.0`
1770/// on every Java codebase — there were no seeds, so BFS reachability
1771/// reached nothing.
1772///
1773/// Patterns recognised:
1774/// - `public static void main(String[] args)` → [`EntryPointKind::Main`]
1775/// - JUnit method annotations `@Test`, `@ParameterizedTest`,
1776///   `@RepeatedTest`, `@TestFactory` → [`EntryPointKind::Test`]
1777/// - Spring DI / stereotype class annotations `@Component`, `@Service`,
1778///   `@Repository`, `@Controller`, `@RestController`, `@Configuration`,
1779///   `@AutoConfiguration` → [`EntryPointKind::LibraryExport`]
1780/// - Spring `@Bean`-annotated methods → [`EntryPointKind::LibraryExport`]
1781///   (each bean is a library export from the DI container's perspective)
1782/// - `@SpringBootApplication`, `@SpringBootTest` annotated classes →
1783///   [`EntryPointKind::FrameworkDispatched`] (Spring container drives
1784///   their lifecycle; static call graph cannot see the invocation)
1785///
1786/// Annotation matching is by the trailing identifier — both
1787/// `@Component` and `@org.springframework.stereotype.Component` are
1788/// recognised. The detector treats `marker_annotation` (`@Foo`) and
1789/// `annotation` (`@Foo(args)`) uniformly.
1790#[derive(Debug, Default, Clone, Copy)]
1791pub struct JavaEntryDetector;
1792
1793impl EntryPointDetector for JavaEntryDetector {
1794    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
1795        let mut entries = Vec::new();
1796        let Some(tree) = parse_with(source, &tree_sitter_java::LANGUAGE.into()) else {
1797            return entries;
1798        };
1799        let root = tree.root_node();
1800        let bytes = source.as_bytes();
1801        visit_java_node(&root, bytes, file_path, &mut entries);
1802        entries
1803    }
1804}
1805
1806/// Recursively walk the Java AST emitting entry points for matching
1807/// class and method declarations.
1808fn visit_java_node(node: &Node<'_>, bytes: &[u8], file_path: &Path, out: &mut Vec<EntryPoint>) {
1809    match node.kind() {
1810        "class_declaration" | "interface_declaration" | "enum_declaration" => {
1811            java_classify_class(node, bytes, file_path, out);
1812        }
1813        "method_declaration" => {
1814            java_classify_method(node, bytes, file_path, out);
1815        }
1816        _ => {}
1817    }
1818    let mut cursor = node.walk();
1819    for child in node.children(&mut cursor) {
1820        visit_java_node(&child, bytes, file_path, out);
1821    }
1822}
1823
1824/// Annotations that mark a class as a Spring-framework-dispatched
1825/// application entry whose lifecycle the container drives.
1826const JAVA_FRAMEWORK_CLASS_ANNOTATIONS: &[&str] = &[
1827    "SpringBootApplication",
1828    "SpringBootTest",
1829    "EnableAutoConfiguration",
1830];
1831
1832/// Annotations that mark a class as a Spring DI bean / library-export
1833/// surface. The Spring container instantiates these even when no
1834/// in-source caller does.
1835const JAVA_STEREOTYPE_CLASS_ANNOTATIONS: &[&str] = &[
1836    "Component",
1837    "Service",
1838    "Repository",
1839    "Controller",
1840    "RestController",
1841    "Configuration",
1842    "AutoConfiguration",
1843    "ConfigurationProperties",
1844];
1845
1846/// Annotations that mark a method as a JUnit test entry.
1847const JAVA_TEST_METHOD_ANNOTATIONS: &[&str] = &[
1848    "Test",
1849    "ParameterizedTest",
1850    "RepeatedTest",
1851    "TestFactory",
1852    "TestTemplate",
1853    "BeforeEach",
1854    "AfterEach",
1855    "BeforeAll",
1856    "AfterAll",
1857];
1858
1859/// Classify a Java class/interface/enum declaration.
1860fn java_classify_class(node: &Node<'_>, bytes: &[u8], file_path: &Path, out: &mut Vec<EntryPoint>) {
1861    let Some(name_node) = node.child_by_field_name("name") else {
1862        return;
1863    };
1864    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
1865        return;
1866    };
1867    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1868    let annotations = java_collect_annotation_names(node, bytes);
1869
1870    if annotations
1871        .iter()
1872        .any(|a| JAVA_FRAMEWORK_CLASS_ANNOTATIONS.contains(&a.as_str()))
1873    {
1874        out.push(EntryPoint {
1875            name: name.to_string(),
1876            kind: EntryPointKind::FrameworkDispatched,
1877            file_path: file_path.to_path_buf(),
1878            line,
1879        });
1880    }
1881
1882    if annotations
1883        .iter()
1884        .any(|a| JAVA_STEREOTYPE_CLASS_ANNOTATIONS.contains(&a.as_str()))
1885    {
1886        out.push(EntryPoint {
1887            name: name.to_string(),
1888            kind: EntryPointKind::LibraryExport,
1889            file_path: file_path.to_path_buf(),
1890            line,
1891        });
1892    }
1893}
1894
1895/// Classify a Java method declaration.
1896fn java_classify_method(
1897    node: &Node<'_>,
1898    bytes: &[u8],
1899    file_path: &Path,
1900    out: &mut Vec<EntryPoint>,
1901) {
1902    let Some(name_node) = node.child_by_field_name("name") else {
1903        return;
1904    };
1905    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
1906        return;
1907    };
1908    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
1909    let annotations = java_collect_annotation_names(node, bytes);
1910
1911    // `public static void main(String[] args)` — the JVM entry contract.
1912    // tree-sitter-java places `public`, `static`, etc. as direct children
1913    // of a `modifiers` node; check both modifier text and method shape.
1914    if name == "main" && java_method_is_public_static(node, bytes) {
1915        out.push(EntryPoint {
1916            name: name.to_string(),
1917            kind: EntryPointKind::Main,
1918            file_path: file_path.to_path_buf(),
1919            line,
1920        });
1921    }
1922
1923    if annotations
1924        .iter()
1925        .any(|a| JAVA_TEST_METHOD_ANNOTATIONS.contains(&a.as_str()))
1926    {
1927        out.push(EntryPoint {
1928            name: name.to_string(),
1929            kind: EntryPointKind::Test,
1930            file_path: file_path.to_path_buf(),
1931            line,
1932        });
1933    }
1934
1935    // @Bean methods are library-export surfaces — Spring publishes them
1936    // into the application context regardless of in-source callers.
1937    if annotations.iter().any(|a| a == "Bean") {
1938        out.push(EntryPoint {
1939            name: name.to_string(),
1940            kind: EntryPointKind::LibraryExport,
1941            file_path: file_path.to_path_buf(),
1942            line,
1943        });
1944    }
1945}
1946
1947/// Collect annotation identifiers from a declaration's `modifiers` child.
1948///
1949/// tree-sitter-java places annotations inside a `modifiers` node child
1950/// of the declaration. Each annotation is either:
1951/// - `marker_annotation` — bare `@Foo`, with a `name` field that is an
1952///   `identifier` or `scoped_identifier`.
1953/// - `annotation` — `@Foo(args)`, same `name` field shape.
1954///
1955/// We extract the trailing identifier from the annotation name so that
1956/// both `@Component` and `@org.springframework.stereotype.Component`
1957/// resolve to `"Component"`.
1958fn java_collect_annotation_names(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
1959    let mut names = Vec::new();
1960    let Some(modifiers) = java_find_modifiers_child(node) else {
1961        return names;
1962    };
1963    let mut cursor = modifiers.walk();
1964    for child in modifiers.children(&mut cursor) {
1965        match child.kind() {
1966            "marker_annotation" | "annotation" => {
1967                if let Some(name_node) = child.child_by_field_name("name")
1968                    && let Some(ident) = java_annotation_trailing_identifier(&name_node, bytes)
1969                {
1970                    names.push(ident);
1971                }
1972            }
1973            _ => {}
1974        }
1975    }
1976    names
1977}
1978
1979/// Find the `modifiers` child of a Java declaration node, if present.
1980fn java_find_modifiers_child<'tree>(node: &Node<'tree>) -> Option<Node<'tree>> {
1981    let mut cursor = node.walk();
1982    node.children(&mut cursor)
1983        .find(|&child| child.kind() == "modifiers")
1984}
1985
1986/// Extract the trailing identifier from a Java annotation name node.
1987///
1988/// Handles two shapes:
1989/// - `(identifier)` — bare `@Foo`; returns `"Foo"`.
1990/// - `(scoped_identifier scope: ... name: (identifier))` — fully qualified
1991///   `@a.b.Foo`; returns `"Foo"`. Falls back to walking children to find
1992///   the last `identifier` if the `name` field is not present.
1993fn java_annotation_trailing_identifier(name_node: &Node<'_>, bytes: &[u8]) -> Option<String> {
1994    match name_node.kind() {
1995        "identifier" => std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()])
1996            .ok()
1997            .map(ToString::to_string),
1998        "scoped_identifier" => {
1999            // Prefer the `name` field; fall back to the last `identifier`
2000            // child if the field is unavailable on this grammar version.
2001            if let Some(name) = name_node.child_by_field_name("name") {
2002                return std::str::from_utf8(&bytes[name.start_byte()..name.end_byte()])
2003                    .ok()
2004                    .map(ToString::to_string);
2005            }
2006            let mut last: Option<String> = None;
2007            let mut cursor = name_node.walk();
2008            for child in name_node.children(&mut cursor) {
2009                if child.kind() == "identifier"
2010                    && let Ok(text) =
2011                        std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
2012                {
2013                    last = Some(text.to_string());
2014                }
2015            }
2016            last
2017        }
2018        _ => None,
2019    }
2020}
2021
2022/// Return true if a Java `method_declaration` has both `public` and
2023/// `static` modifiers — required for the JVM `main` entry contract.
2024fn java_method_is_public_static(node: &Node<'_>, bytes: &[u8]) -> bool {
2025    let Some(modifiers) = java_find_modifiers_child(node) else {
2026        return false;
2027    };
2028    let text =
2029        std::str::from_utf8(&bytes[modifiers.start_byte()..modifiers.end_byte()]).unwrap_or("");
2030    // tree-sitter-java emits `public` and `static` as unnamed children
2031    // (keyword tokens) inside `modifiers`. The whole-text contains-check
2032    // is a reliable proxy and avoids enumerating the grammar's keyword
2033    // node kinds, which differ across grammar versions.
2034    text.contains("public") && text.contains("static")
2035}
2036
2037// ---------------------------------------------------------------------------
2038// Kotlin detector (B-0009, Cycle 9 / 4.1.5)
2039// ---------------------------------------------------------------------------
2040
2041/// Kotlin entry-point detector (B-0009, Cycle 9).
2042///
2043/// Detects:
2044/// - Top-level `fun main(...)` → [`EntryPointKind::Main`]
2045/// - Classes annotated with `@Component` / `@Service` / `@Repository` /
2046///   `@Controller` / `@RestController` / `@Configuration` →
2047///   [`EntryPointKind::LibraryExport`]
2048/// - Classes annotated with `@SpringBootApplication` /
2049///   `@SpringBootTest` → [`EntryPointKind::FrameworkDispatched`]
2050/// - Functions annotated with `@Test`, `@ParameterizedTest`,
2051///   `@RepeatedTest` → [`EntryPointKind::Test`]
2052/// - Classes whose names end in `Test` or `Spec` (Spek / KotlinTest
2053///   convention) → [`EntryPointKind::Test`]
2054///
2055/// Uses `tree_sitter_kotlin_ng` — the same grammar wired into the
2056/// chunker via `crates/ripvec-core/src/languages.rs`. Annotations are
2057/// matched by trailing identifier (so `@Component` and
2058/// `@org.springframework.stereotype.Component` both resolve).
2059#[derive(Debug, Default, Clone, Copy)]
2060pub struct KotlinEntryDetector;
2061
2062impl EntryPointDetector for KotlinEntryDetector {
2063    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
2064        let mut entries = Vec::new();
2065        let Some(tree) = parse_with(source, &tree_sitter_kotlin_ng::LANGUAGE.into()) else {
2066            return entries;
2067        };
2068        let root = tree.root_node();
2069        let bytes = source.as_bytes();
2070
2071        // Top-level functions: `fun main(...)` is the JVM entry contract.
2072        // Walk only the root's direct children to enforce top-level scope.
2073        let mut cursor = root.walk();
2074        for child in root.children(&mut cursor) {
2075            if child.kind() == "function_declaration" {
2076                kotlin_classify_top_level_function(&child, bytes, file_path, &mut entries);
2077            }
2078        }
2079
2080        // Class / object declarations + nested function declarations.
2081        // Use full-AST walk so annotated nested classes and methods are
2082        // also seeded.
2083        visit_kotlin_node(&root, bytes, file_path, &mut entries);
2084
2085        entries
2086    }
2087}
2088
2089/// Annotations that mark a Kotlin class as Spring-framework-dispatched.
2090const KOTLIN_FRAMEWORK_CLASS_ANNOTATIONS: &[&str] = &[
2091    "SpringBootApplication",
2092    "SpringBootTest",
2093    "EnableAutoConfiguration",
2094];
2095
2096/// Annotations that mark a Kotlin class as a DI / library-export surface.
2097const KOTLIN_STEREOTYPE_CLASS_ANNOTATIONS: &[&str] = &[
2098    "Component",
2099    "Service",
2100    "Repository",
2101    "Controller",
2102    "RestController",
2103    "Configuration",
2104    "AutoConfiguration",
2105    "ConfigurationProperties",
2106];
2107
2108/// Annotations that mark a Kotlin function as a test entry.
2109const KOTLIN_TEST_FUNCTION_ANNOTATIONS: &[&str] = &[
2110    "Test",
2111    "ParameterizedTest",
2112    "RepeatedTest",
2113    "TestFactory",
2114    "TestTemplate",
2115];
2116
2117/// Recursively walk the Kotlin AST emitting entry points for class /
2118/// object / function declarations.
2119fn visit_kotlin_node(node: &Node<'_>, bytes: &[u8], file_path: &Path, out: &mut Vec<EntryPoint>) {
2120    match node.kind() {
2121        "class_declaration" | "object_declaration" => {
2122            kotlin_classify_class(node, bytes, file_path, out);
2123        }
2124        "function_declaration" => {
2125            kotlin_classify_function_annotations(node, bytes, file_path, out);
2126        }
2127        _ => {}
2128    }
2129    let mut cursor = node.walk();
2130    for child in node.children(&mut cursor) {
2131        visit_kotlin_node(&child, bytes, file_path, out);
2132    }
2133}
2134
2135/// Classify a top-level Kotlin function declaration.
2136///
2137/// Emits [`EntryPointKind::Main`] for any top-level `fun main`, whether
2138/// declared with `()`, `(args: Array<String>)`, or the Kotlin 1.3+
2139/// suspended `suspend fun main(...)` shape.
2140fn kotlin_classify_top_level_function(
2141    node: &Node<'_>,
2142    bytes: &[u8],
2143    file_path: &Path,
2144    out: &mut Vec<EntryPoint>,
2145) {
2146    let Some(name_node) = node.child_by_field_name("name") else {
2147        return;
2148    };
2149    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
2150        return;
2151    };
2152    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
2153    if name == "main" {
2154        out.push(EntryPoint {
2155            name: name.to_string(),
2156            kind: EntryPointKind::Main,
2157            file_path: file_path.to_path_buf(),
2158            line,
2159        });
2160    }
2161}
2162
2163/// Classify annotations on a Kotlin function (anywhere in the tree).
2164///
2165/// Emits [`EntryPointKind::Test`] for `@Test` family annotations.
2166fn kotlin_classify_function_annotations(
2167    node: &Node<'_>,
2168    bytes: &[u8],
2169    file_path: &Path,
2170    out: &mut Vec<EntryPoint>,
2171) {
2172    let Some(name_node) = node.child_by_field_name("name") else {
2173        return;
2174    };
2175    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
2176        return;
2177    };
2178    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
2179    let annotations = kotlin_collect_annotation_names(node, bytes);
2180    if annotations
2181        .iter()
2182        .any(|a| KOTLIN_TEST_FUNCTION_ANNOTATIONS.contains(&a.as_str()))
2183    {
2184        out.push(EntryPoint {
2185            name: name.to_string(),
2186            kind: EntryPointKind::Test,
2187            file_path: file_path.to_path_buf(),
2188            line,
2189        });
2190    }
2191}
2192
2193/// Classify a Kotlin class / object declaration.
2194fn kotlin_classify_class(
2195    node: &Node<'_>,
2196    bytes: &[u8],
2197    file_path: &Path,
2198    out: &mut Vec<EntryPoint>,
2199) {
2200    let Some(name_node) = node.child_by_field_name("name") else {
2201        return;
2202    };
2203    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
2204        return;
2205    };
2206    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
2207    let annotations = kotlin_collect_annotation_names(node, bytes);
2208
2209    if annotations
2210        .iter()
2211        .any(|a| KOTLIN_FRAMEWORK_CLASS_ANNOTATIONS.contains(&a.as_str()))
2212    {
2213        out.push(EntryPoint {
2214            name: name.to_string(),
2215            kind: EntryPointKind::FrameworkDispatched,
2216            file_path: file_path.to_path_buf(),
2217            line,
2218        });
2219    }
2220
2221    if annotations
2222        .iter()
2223        .any(|a| KOTLIN_STEREOTYPE_CLASS_ANNOTATIONS.contains(&a.as_str()))
2224    {
2225        out.push(EntryPoint {
2226            name: name.to_string(),
2227            kind: EntryPointKind::LibraryExport,
2228            file_path: file_path.to_path_buf(),
2229            line,
2230        });
2231    }
2232
2233    // Convention-based: classes named `*Test` or `*Spec` are test entries
2234    // even without an explicit annotation — covers Spek / KotlinTest /
2235    // many Spring projects' naming patterns.
2236    if (name.ends_with("Test") || name.ends_with("Spec")) && name.len() > 4 {
2237        out.push(EntryPoint {
2238            name: name.to_string(),
2239            kind: EntryPointKind::Test,
2240            file_path: file_path.to_path_buf(),
2241            line,
2242        });
2243    }
2244}
2245
2246/// Collect annotation identifiers preceding a Kotlin declaration.
2247///
2248/// In `tree-sitter-kotlin-ng` annotations appear inside a `modifiers`
2249/// (or `modifier_list`) child of the declaration. The annotation node
2250/// kinds vary by grammar version — we accept `annotation` and
2251/// `single_annotation` and pull the trailing identifier from whichever
2252/// child contains the annotation's user_type / identifier.
2253fn kotlin_collect_annotation_names(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
2254    let mut names = Vec::new();
2255    let mut cursor = node.walk();
2256    for child in node.children(&mut cursor) {
2257        match child.kind() {
2258            "modifiers" | "modifier_list" => {
2259                kotlin_collect_annotations_in(&child, bytes, &mut names);
2260            }
2261            // Some grammar variants attach annotations directly as
2262            // siblings of the declaration body rather than inside a
2263            // `modifiers` node — handle that case too.
2264            "annotation" | "single_annotation" => {
2265                if let Some(ident) = kotlin_annotation_identifier(&child, bytes) {
2266                    names.push(ident);
2267                }
2268            }
2269            _ => {}
2270        }
2271    }
2272    names
2273}
2274
2275/// Walk into a Kotlin `modifiers` node and collect annotation names.
2276fn kotlin_collect_annotations_in(node: &Node<'_>, bytes: &[u8], out: &mut Vec<String>) {
2277    let mut cursor = node.walk();
2278    for child in node.children(&mut cursor) {
2279        match child.kind() {
2280            "annotation" | "single_annotation" => {
2281                if let Some(ident) = kotlin_annotation_identifier(&child, bytes) {
2282                    out.push(ident);
2283                }
2284            }
2285            _ => {}
2286        }
2287    }
2288}
2289
2290/// Extract the trailing identifier from a Kotlin annotation node.
2291///
2292/// Kotlin annotations parse as `@<user_type>` where `user_type` is a
2293/// dot-separated chain of `simple_user_type` nodes each containing a
2294/// `(simple_identifier)` (or just `(identifier)` in `tree-sitter-kotlin-ng`).
2295/// We walk the annotation subtree and return the last identifier found —
2296/// matching `@Component` and `@org.springframework.stereotype.Component`
2297/// uniformly to `"Component"`.
2298fn kotlin_annotation_identifier(node: &Node<'_>, bytes: &[u8]) -> Option<String> {
2299    let mut last: Option<String> = None;
2300    let mut stack: Vec<Node<'_>> = vec![*node];
2301    while let Some(n) = stack.pop() {
2302        if matches!(n.kind(), "identifier" | "simple_identifier")
2303            && let Ok(text) = std::str::from_utf8(&bytes[n.start_byte()..n.end_byte()])
2304        {
2305            // Skip the `@` token itself if it happens to be tokenised as
2306            // an identifier (it isn't in practice, but be defensive).
2307            if !text.is_empty() && text != "@" {
2308                last = Some(text.to_string());
2309            }
2310        }
2311        let mut cursor = n.walk();
2312        for child in n.children(&mut cursor) {
2313            stack.push(child);
2314        }
2315    }
2316    last
2317}
2318
2319// ---------------------------------------------------------------------------
2320// Dispatch
2321// ---------------------------------------------------------------------------
2322
2323/// Return the entry-point detector for a language identifier.
2324///
2325/// `language` is the lowercased language name as used in
2326/// `crate::languages` (`"rust"`, `"python"`, `"go"`, `"c"`, `"javascript"`,
2327/// `"java"`, `"kotlin"`).
2328/// Returns `None` for any language not yet covered by this module.
2329///
2330/// File-extension dispatch (`"rs"`, `"py"`, `"pyi"`, `"go"`, `"c"`,
2331/// `"h"`, `"js"`, `"jsx"`, `"ts"`, `"tsx"`, `"java"`, `"kt"`, `"kts"`) is
2332/// also accepted for caller convenience — the BFS walk in X2 carries
2333/// extensions, not language names, through its per-file loop.
2334#[must_use]
2335pub fn detector_for(language: &str) -> Option<Box<dyn EntryPointDetector>> {
2336    match language {
2337        "rust" | "rs" => Some(Box::new(RustEntryDetector)),
2338        "python" | "py" | "pyi" => Some(Box::new(PythonEntryDetector)),
2339        "go" => Some(Box::new(GoEntryDetector)),
2340        "c" | "h" => Some(Box::new(CEntryDetector)),
2341        "javascript" | "js" | "jsx" | "typescript" | "ts" | "tsx" => {
2342            Some(Box::new(JsEntryDetector))
2343        }
2344        "java" => Some(Box::new(JavaEntryDetector)),
2345        "kotlin" | "kt" | "kts" => Some(Box::new(KotlinEntryDetector)),
2346        _ => None,
2347    }
2348}
2349
2350// ---------------------------------------------------------------------------
2351// Summary aggregation (4.1.1 Front A node A4)
2352// ---------------------------------------------------------------------------
2353
2354/// Render a per-kind count map as a sorted list of human-friendly summary
2355/// lines for the `find_dead_code` MCP tool's `entry_points_detected`
2356/// field.
2357///
2358/// Each line follows the shape `"<count> <label>"` — e.g. `"12
2359/// framework-dispatched (MCP tools)"`, `"3 library exports"`. The output
2360/// is sorted lexicographically so the surface order is deterministic
2361/// across calls.
2362///
2363/// Lives in `ripvec-core` so the MCP tool wrapper and any future CLI
2364/// consumer share a single labelling convention. Added in 4.1.1 (Wave 1
2365/// Front A node A4) alongside [`EntryPointKind::FrameworkDispatched`].
2366#[must_use]
2367pub fn summarize_entry_point_kinds<S: std::hash::BuildHasher>(
2368    counts: &std::collections::HashMap<EntryPointKind, usize, S>,
2369) -> Vec<String> {
2370    let mut summary: Vec<String> = counts
2371        .iter()
2372        .map(|(kind, count)| format!("{count} {label}", label = label_for_kind(*kind)))
2373        .collect();
2374    summary.sort();
2375    summary
2376}
2377
2378/// Return the human-friendly label used in the
2379/// [`summarize_entry_point_kinds`] output for a given variant.
2380///
2381/// Exposed `pub` so external consumers can format individual kinds
2382/// without rebuilding a count map.
2383#[must_use]
2384pub fn label_for_kind(kind: EntryPointKind) -> &'static str {
2385    match kind {
2386        EntryPointKind::Main => "main",
2387        EntryPointKind::LibraryExport => "library exports",
2388        EntryPointKind::Test => "tests",
2389        EntryPointKind::Ffi => "FFI",
2390        EntryPointKind::ProcMacro => "proc-macros",
2391        EntryPointKind::Init => "init functions",
2392        EntryPointKind::BuildScript => "build scripts",
2393        EntryPointKind::FrameworkDispatched => "framework-dispatched (MCP tools)",
2394    }
2395}
2396
2397// ---------------------------------------------------------------------------
2398// Internal helpers
2399// ---------------------------------------------------------------------------
2400
2401/// Parse `source` with the given tree-sitter `Language`. Returns `None`
2402/// if the parser cannot be configured or the parse fails.
2403fn parse_with(source: &str, language: &tree_sitter::Language) -> Option<tree_sitter::Tree> {
2404    let mut parser = Parser::new();
2405    parser.set_language(language).ok()?;
2406    parser.parse(source, None)
2407}
2408
2409// Unused-but-keep-for-X2 helpers. These ride alongside the detector
2410// implementations so X2 has a single import point for the BFS-time
2411// helpers.
2412//
2413// `query_match_lines` returns the 1-based line of every match of a
2414// compiled tree-sitter query against `source`. X2 will use this to
2415// post-process the raw RepoGraph definitions when an entry-point
2416// predicate fires on something that is not itself a Definition (e.g.
2417// the Python `if __name__ == "__main__"` block isn't a Definition —
2418// it's a top-level statement that anchors any function it calls).
2419//
2420// We expose it as `pub(crate)` so X2 can consume without it widening
2421// the public surface.
2422
2423#[allow(dead_code)]
2424pub(crate) fn query_match_lines(
2425    source: &str,
2426    language: &tree_sitter::Language,
2427    query: &Query,
2428) -> Vec<u32> {
2429    let mut lines = Vec::new();
2430    let Some(tree) = parse_with(source, language) else {
2431        return lines;
2432    };
2433    let mut cursor = QueryCursor::new();
2434    let mut matches = cursor.matches(query, tree.root_node(), source.as_bytes());
2435    while let Some(m) = matches.next() {
2436        for cap in m.captures {
2437            let line = u32::try_from(cap.node.start_position().row + 1).unwrap_or(u32::MAX);
2438            lines.push(line);
2439        }
2440    }
2441    lines
2442}