Skip to main content

ripvec_core/
entry_points.rs

1//! Per-language entry-point detection for the `find_dead_code` MCP tool
2//! (4.1.0).
3//!
4//! The trait [`EntryPointDetector`] and its per-language implementors
5//! ([`RustEntryDetector`], [`PythonEntryDetector`], [`GoEntryDetector`])
6//! identify the syntactic shapes that act as roots of the call graph: the
7//! BFS reachability walk for dead-code detection seeds from the union of
8//! all [`EntryPoint`]s emitted across the indexed corpus.
9//!
10//! This module is X1 of the 4.1.0 series; the actual reachability walk and
11//! cluster discovery (`RepoGraph::compute_dead_code`) lands in X2. The MCP
12//! tool wrapper lands in X3. The remaining language detectors land in X4.
13//! See `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 for the per-language
14//! entry-point survey and Section 3 for the algorithm that consumes this
15//! output.
16//!
17//! ## Type B (Wired-Stub) self-audit note
18//!
19//! Until X2 lands, every public item in this module is consumed only from
20//! the integration tests under `crates/ripvec-core/tests/entry_points.rs`.
21//! `scripts/check_wiring_gaps.sh` will report these as Type B findings.
22//! The findings are **explicitly deferred** to X2 — see the Section 9
23//! PLAN.md entry — not silently dangling. Do not annotate with
24//! `#[doc(hidden)]`: the doc-visibility surface is part of the X2 contract
25//! and is the intended public API of the dead-code module.
26
27use std::path::{Path, PathBuf};
28
29use streaming_iterator::StreamingIterator;
30use tree_sitter::{Node, Parser, Query, QueryCursor};
31
32/// Classification of why a [`Definition`](crate::repo_map::Definition)-shaped
33/// item is treated as an entry point for the dead-code reachability walk.
34///
35/// Categories follow Section 2 of `docs/FIND_DEAD_CODE_DESIGN.md`. The
36/// classification is per-detection, not per-definition: the same
37/// `pub fn` can appear as both [`EntryPointKind::Main`] (for binaries) and
38/// [`EntryPointKind::LibraryExport`] (for libraries) depending on how the
39/// containing crate is structured. Downstream consumers (X2) treat each
40/// detection independently.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum EntryPointKind {
43    /// A binary-crate `main`-shaped entry: `fn main()` in Rust, `func main()`
44    /// in Go, the `if __name__ == "__main__"` block in Python.
45    Main,
46
47    /// A public-API surface item: `pub` re-exports in Rust libraries,
48    /// `__all__` exports in Python, capitalised names in Go libraries.
49    LibraryExport,
50
51    /// A test entry: `#[test]` / `#[bench]` in Rust, `def test_*` /
52    /// `*_test.py` in Python, `func TestX` / `BenchmarkX` / `ExampleX` /
53    /// `FuzzX` in Go.
54    Test,
55
56    /// A foreign-function-interface entry: `#[no_mangle]` /
57    /// `extern "C"` in Rust, cgo `//export` in Go.
58    Ffi,
59
60    /// A procedural-macro entry: `#[proc_macro]`, `#[proc_macro_derive]`,
61    /// `#[proc_macro_attribute]` in Rust.
62    ProcMacro,
63
64    /// A package-initialisation entry: `func init()` in Go.
65    Init,
66
67    /// A build-script entry: Cargo's `build.rs`.
68    BuildScript,
69}
70
71/// A single entry-point detection in one source file.
72///
73/// Per-detection, not per-definition — the same `pub fn` can produce
74/// multiple `EntryPoint` instances (one for each matching predicate).
75/// Downstream consumers should treat each detection as an independent
76/// reachability seed.
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub struct EntryPoint {
79    /// The symbol name of the entry point. For Rust this is the function
80    /// item identifier; for Python it is the function or module-level
81    /// expression name; for Go it is the function declaration identifier.
82    pub name: String,
83
84    /// Why this item was treated as an entry point.
85    pub kind: EntryPointKind,
86
87    /// The source file the entry point was detected in.
88    pub file_path: PathBuf,
89
90    /// 1-based line number of the entry point declaration. Matches the
91    /// `start_line` field of [`crate::repo_map::Definition`].
92    pub line: u32,
93}
94
95/// Per-language entry-point detector.
96///
97/// Designed for consumption by `RepoGraph::compute_dead_code` in
98/// 4.1.0-X2. Until X2 lands, the only consumers are the integration tests
99/// under `crates/ripvec-core/tests/entry_points.rs` — see the
100/// module-level docstring for the Type B (Wired-Stub) self-audit note.
101///
102/// Implementations parse the source once per call. The parsing cost is
103/// trivial (tree-sitter is O(n) and the source is already in memory at
104/// detection time), and stateless parsers compose more cleanly than a
105/// shared parser cache across the three (and eventually eleven) language
106/// detectors. X2's `RepoGraph::compute_dead_code` already iterates
107/// per-file, so the per-file parse adds no additional walk cost.
108pub trait EntryPointDetector {
109    /// Return every entry point declared in this source file.
110    ///
111    /// `source` is the full UTF-8 contents of `file_path`. The path is
112    /// passed alongside `source` so detectors that consider filename
113    /// patterns (e.g. Python's `test_*.py` and `*_test.py`,
114    /// Rust's `build.rs`) can use both signals.
115    ///
116    /// If parsing fails, returns an empty vector — entry-point detection
117    /// is best-effort and should never abort the dead-code walk.
118    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint>;
119}
120
121// ---------------------------------------------------------------------------
122// Rust detector
123// ---------------------------------------------------------------------------
124
125/// Rust entry-point detector.
126///
127/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
128/// - `pub fn main()` and bare `fn main()` (Main)
129/// - `pub fn` items in `lib.rs` / `mod.rs` (LibraryExport)
130/// - Items annotated with `#[test]` or `#[bench]` (Test)
131/// - Items annotated with `#[no_mangle]` or marked `extern "C"` (Ffi)
132/// - Items annotated with `#[proc_macro]`, `#[proc_macro_derive]`, or
133///   `#[proc_macro_attribute]` (ProcMacro)
134/// - The entire `build.rs` file is treated as a single BuildScript entry
135///   point (the build script's `main` is the cargo-known entry).
136#[derive(Debug, Default, Clone, Copy)]
137pub struct RustEntryDetector;
138
139impl EntryPointDetector for RustEntryDetector {
140    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
141        let mut entries = Vec::new();
142        let Some(tree) = parse_with(source, &tree_sitter_rust::LANGUAGE.into()) else {
143            return entries;
144        };
145        let root = tree.root_node();
146        let bytes = source.as_bytes();
147
148        // Treat the entire build.rs file as a single BuildScript entry.
149        // The crate's main may be named anything inside build.rs (cargo
150        // calls the file's main), so we emit one entry at line 1.
151        if file_path.file_name().and_then(|s| s.to_str()) == Some("build.rs") {
152            entries.push(EntryPoint {
153                name: "build.rs".to_string(),
154                kind: EntryPointKind::BuildScript,
155                file_path: file_path.to_path_buf(),
156                line: 1,
157            });
158        }
159
160        let is_lib_or_mod_rs = matches!(
161            file_path.file_name().and_then(|s| s.to_str()),
162            Some("lib.rs" | "mod.rs")
163        );
164
165        // Walk every function_item declaration recursively. For each item:
166        //   - inspect its preceding attribute_item siblings for #[test],
167        //     #[bench], #[no_mangle], #[proc_macro*]
168        //   - inspect the function_item's own modifiers for `extern "C"`
169        //   - inspect the name for `main`
170        //   - if file is lib.rs/mod.rs and the item is `pub`, emit
171        //     LibraryExport
172        visit_rust_node(&root, bytes, file_path, is_lib_or_mod_rs, &mut entries);
173        entries
174    }
175}
176
177fn visit_rust_node(
178    node: &Node<'_>,
179    bytes: &[u8],
180    file_path: &Path,
181    is_lib_or_mod_rs: bool,
182    out: &mut Vec<EntryPoint>,
183) {
184    if node.kind() == "function_item" {
185        rust_classify_function(node, bytes, file_path, is_lib_or_mod_rs, out);
186    }
187    let mut cursor = node.walk();
188    for child in node.children(&mut cursor) {
189        visit_rust_node(&child, bytes, file_path, is_lib_or_mod_rs, out);
190    }
191}
192
193fn rust_classify_function(
194    node: &Node<'_>,
195    bytes: &[u8],
196    file_path: &Path,
197    is_lib_or_mod_rs: bool,
198    out: &mut Vec<EntryPoint>,
199) {
200    // Find the function name. function_item has a `name` field whose
201    // value is an identifier child.
202    let name_node = node.child_by_field_name("name");
203    let Some(name_node) = name_node else { return };
204    let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
205        return;
206    };
207    let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
208
209    // Gather attributes that immediately precede this function. In
210    // tree-sitter-rust the attributes are SIBLING attribute_item nodes,
211    // not children of the function_item, so we walk previous siblings.
212    let attrs = collect_preceding_rust_attrs(node, bytes);
213
214    // A single function item may match multiple predicates (e.g. a
215    // `#[no_mangle] pub extern "C" fn main` in `lib.rs` is both Ffi
216    // and Main and LibraryExport). Emit one EntryPoint per matching
217    // predicate; the BFS in X2 treats each detection as a distinct
218    // reachability seed.
219
220    // #[proc_macro], #[proc_macro_derive], #[proc_macro_attribute].
221    if attrs.iter().any(|a| {
222        a.starts_with("proc_macro_derive")
223            || a.starts_with("proc_macro_attribute")
224            || a == "proc_macro"
225            || a.starts_with("proc_macro(")
226    }) {
227        out.push(EntryPoint {
228            name: name.to_string(),
229            kind: EntryPointKind::ProcMacro,
230            file_path: file_path.to_path_buf(),
231            line,
232        });
233    }
234
235    // #[test] / #[bench].
236    if attrs.iter().any(|a| a == "test" || a == "bench") {
237        out.push(EntryPoint {
238            name: name.to_string(),
239            kind: EntryPointKind::Test,
240            file_path: file_path.to_path_buf(),
241            line,
242        });
243    }
244
245    // FFI: #[no_mangle] OR `extern "C"` in the function declaration.
246    let function_text =
247        std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]).unwrap_or("");
248    let has_extern_c =
249        rust_function_has_extern_c(node, bytes) || function_text.contains("extern \"C\"");
250    if attrs.iter().any(|a| a == "no_mangle") || has_extern_c {
251        out.push(EntryPoint {
252            name: name.to_string(),
253            kind: EntryPointKind::Ffi,
254            file_path: file_path.to_path_buf(),
255            line,
256        });
257    }
258
259    // Main: `fn main` (with or without `pub`).
260    if name == "main" {
261        out.push(EntryPoint {
262            name: name.to_string(),
263            kind: EntryPointKind::Main,
264            file_path: file_path.to_path_buf(),
265            line,
266        });
267    }
268
269    // LibraryExport: `pub fn` in lib.rs / mod.rs.
270    if is_lib_or_mod_rs && rust_function_is_pub(node, bytes) {
271        out.push(EntryPoint {
272            name: name.to_string(),
273            kind: EntryPointKind::LibraryExport,
274            file_path: file_path.to_path_buf(),
275            line,
276        });
277    }
278}
279
280/// Collect the text of every `#[...]` attribute node that immediately
281/// precedes this function_item in source order. The returned strings are
282/// the attribute path/identifier (e.g. `"test"`, `"no_mangle"`,
283/// `"proc_macro_derive(Foo)"`), with the leading `#[` and trailing `]`
284/// stripped, and any leading `outer_attribute_item` `#[` punctuation
285/// removed.
286fn collect_preceding_rust_attrs(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
287    let mut attrs = Vec::new();
288    let mut prev = node.prev_sibling();
289    while let Some(p) = prev {
290        if p.kind() == "attribute_item" || p.kind() == "inner_attribute_item" {
291            // The attribute_item child structure is `# [ attribute ]`;
292            // pull the `attribute` child and use its text.
293            let mut cursor = p.walk();
294            let mut attr_text: Option<String> = None;
295            for child in p.children(&mut cursor) {
296                if child.kind() == "attribute"
297                    && let Ok(text) =
298                        std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
299                {
300                    attr_text = Some(text.to_string());
301                }
302            }
303            if let Some(t) = attr_text {
304                attrs.push(t);
305            }
306            prev = p.prev_sibling();
307        } else if p.kind().starts_with("line_comment") || p.kind().starts_with("block_comment") {
308            prev = p.prev_sibling();
309        } else {
310            break;
311        }
312    }
313    attrs
314}
315
316/// Return true if the function_item node has a `pub` visibility modifier.
317fn rust_function_is_pub(node: &Node<'_>, bytes: &[u8]) -> bool {
318    let mut cursor = node.walk();
319    for child in node.children(&mut cursor) {
320        if child.kind() == "visibility_modifier"
321            && let Ok(text) = std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
322        {
323            return text.starts_with("pub");
324        }
325    }
326    false
327}
328
329/// Return true if the function_item has an `extern "C"` ABI declaration
330/// as a function-modifier child (e.g. `pub extern "C" fn bar()`).
331fn rust_function_has_extern_c(node: &Node<'_>, bytes: &[u8]) -> bool {
332    let mut cursor = node.walk();
333    for child in node.children(&mut cursor) {
334        // tree-sitter-rust uses `function_modifiers` containing
335        // `extern_modifier`; the latter's child is a `string_literal`
336        // with the ABI name.
337        if child.kind() != "function_modifiers" {
338            continue;
339        }
340        let mut inner = child.walk();
341        for grandchild in child.children(&mut inner) {
342            if grandchild.kind() == "extern_modifier"
343                && let Ok(text) =
344                    std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
345                && text.contains("\"C\"")
346            {
347                return true;
348            }
349        }
350    }
351    false
352}
353
354// ---------------------------------------------------------------------------
355// Python detector
356// ---------------------------------------------------------------------------
357
358/// Python entry-point detector.
359///
360/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
361/// - `if __name__ == "__main__":` blocks at module top level (Main)
362/// - Top-level functions named in `__all__` (LibraryExport)
363/// - Functions starting with `test_` in files matching `test_*.py` /
364///   `*_test.py` or under a `tests/` directory (Test)
365///
366/// Framework decorators (`@click.command`, `@app.route`,
367/// `@pytest.fixture`) are not yet captured — see X4 for the framework
368/// pass.
369#[derive(Debug, Default, Clone, Copy)]
370pub struct PythonEntryDetector;
371
372impl EntryPointDetector for PythonEntryDetector {
373    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
374        let mut entries = Vec::new();
375        let Some(tree) = parse_with(source, &tree_sitter_python::LANGUAGE.into()) else {
376            return entries;
377        };
378        let root = tree.root_node();
379        let bytes = source.as_bytes();
380
381        let is_test_file = python_is_test_file(file_path);
382
383        // Module top-level statements.
384        let mut cursor = root.walk();
385        for child in root.children(&mut cursor) {
386            match child.kind() {
387                "if_statement" if python_is_dunder_main_block(&child, bytes) => {
388                    let line = u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
389                    entries.push(EntryPoint {
390                        name: "__main__".to_string(),
391                        kind: EntryPointKind::Main,
392                        file_path: file_path.to_path_buf(),
393                        line,
394                    });
395                }
396                "expression_statement" => {
397                    // `__all__ = [...]` is an expression_statement
398                    // containing an assignment.
399                    if let Some(names) = python_extract_dunder_all(&child, bytes) {
400                        let line =
401                            u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
402                        for n in names {
403                            entries.push(EntryPoint {
404                                name: n,
405                                kind: EntryPointKind::LibraryExport,
406                                file_path: file_path.to_path_buf(),
407                                line,
408                            });
409                        }
410                    }
411                }
412                "function_definition" | "decorated_definition" => {
413                    let fn_node = if child.kind() == "decorated_definition" {
414                        child.child_by_field_name("definition")
415                    } else {
416                        Some(child)
417                    };
418                    if let Some(fn_node) = fn_node
419                        && fn_node.kind() == "function_definition"
420                        && let Some(name_node) = fn_node.child_by_field_name("name")
421                        && let Ok(name) = std::str::from_utf8(
422                            &bytes[name_node.start_byte()..name_node.end_byte()],
423                        )
424                        && is_test_file
425                        && name.starts_with("test_")
426                    {
427                        let line =
428                            u32::try_from(fn_node.start_position().row + 1).unwrap_or(u32::MAX);
429                        entries.push(EntryPoint {
430                            name: name.to_string(),
431                            kind: EntryPointKind::Test,
432                            file_path: file_path.to_path_buf(),
433                            line,
434                        });
435                    }
436                }
437                _ => {}
438            }
439        }
440
441        entries
442    }
443}
444
445fn python_is_test_file(file_path: &Path) -> bool {
446    let Some(file_name) = file_path.file_name().and_then(|s| s.to_str()) else {
447        return false;
448    };
449    let is_py = Path::new(file_name)
450        .extension()
451        .is_some_and(|ext| ext.eq_ignore_ascii_case("py"));
452    if !is_py {
453        return false;
454    }
455    let stem = Path::new(file_name)
456        .file_stem()
457        .and_then(|s| s.to_str())
458        .unwrap_or("");
459    if stem.starts_with("test_") || stem.ends_with("_test") {
460        return true;
461    }
462    // Any component named `tests` in the parent directory chain.
463    file_path
464        .components()
465        .any(|c| c.as_os_str() == std::ffi::OsStr::new("tests"))
466}
467
468fn python_is_dunder_main_block(node: &Node<'_>, bytes: &[u8]) -> bool {
469    // if condition: comparison `__name__ == "__main__"`.
470    let cond = node.child_by_field_name("condition");
471    let Some(cond) = cond else { return false };
472    let Ok(text) = std::str::from_utf8(&bytes[cond.start_byte()..cond.end_byte()]) else {
473        return false;
474    };
475    // Tolerate single or double quotes around `__main__`.
476    let normalized = text.replace(' ', "");
477    normalized.contains("__name__==\"__main__\"")
478        || normalized.contains("__name__=='__main__'")
479        || normalized.contains("\"__main__\"==__name__")
480        || normalized.contains("'__main__'==__name__")
481}
482
483/// Extract the string literals from a top-level `__all__ = [...]`
484/// assignment. Returns `None` if the statement is not such an assignment.
485fn python_extract_dunder_all(node: &Node<'_>, bytes: &[u8]) -> Option<Vec<String>> {
486    // expression_statement -> assignment (left, right)
487    let mut cursor = node.walk();
488    for child in node.children(&mut cursor) {
489        if child.kind() == "assignment" {
490            let left = child.child_by_field_name("left")?;
491            let right = child.child_by_field_name("right")?;
492            let left_text = std::str::from_utf8(&bytes[left.start_byte()..left.end_byte()]).ok()?;
493            if left_text.trim() != "__all__" {
494                return None;
495            }
496            // right is typically a `list` or `tuple` node containing
497            // `string` children.
498            let mut names = Vec::new();
499            let mut inner = right.walk();
500            for grandchild in right.children(&mut inner) {
501                if grandchild.kind() != "string" {
502                    continue;
503                }
504                // Walk the string node to find string_content child.
505                let mut sc = grandchild.walk();
506                let mut content_text: Option<String> = None;
507                for sg in grandchild.children(&mut sc) {
508                    if sg.kind() == "string_content"
509                        && let Ok(t) = std::str::from_utf8(&bytes[sg.start_byte()..sg.end_byte()])
510                    {
511                        content_text = Some(t.to_string());
512                    }
513                }
514                if let Some(t) = content_text {
515                    names.push(t);
516                } else if let Ok(raw) =
517                    std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
518                {
519                    // Fallback: strip the outer quotes from the raw
520                    // string text.
521                    let trimmed = raw.trim_matches(|c| c == '"' || c == '\'');
522                    names.push(trimmed.to_string());
523                }
524            }
525            return Some(names);
526        }
527    }
528    None
529}
530
531// ---------------------------------------------------------------------------
532// Go detector
533// ---------------------------------------------------------------------------
534
535/// Go entry-point detector.
536///
537/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
538/// - `func main()` in `package main` (Main)
539/// - `func init()` (Init) — runs automatically at package load
540/// - Functions starting with `Test`, `Benchmark`, `Example`, `Fuzz` (Test)
541/// - Exported names (starting with uppercase) in library packages
542///   (LibraryExport)
543#[derive(Debug, Default, Clone, Copy)]
544pub struct GoEntryDetector;
545
546impl EntryPointDetector for GoEntryDetector {
547    fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
548        let mut entries = Vec::new();
549        let Some(tree) = parse_with(source, &tree_sitter_go::LANGUAGE.into()) else {
550            return entries;
551        };
552        let root = tree.root_node();
553        let bytes = source.as_bytes();
554
555        // Determine the package name. `package main` enables `main` as
556        // the binary entry; non-main packages are libraries whose
557        // exported names are library entries.
558        let package_name = go_package_name(&root, bytes).unwrap_or_default();
559        let is_main_package = package_name == "main";
560
561        // Walk top-level function_declaration / method_declaration nodes.
562        let mut cursor = root.walk();
563        for child in root.children(&mut cursor) {
564            match child.kind() {
565                "function_declaration" => {
566                    if let Some(name_node) = child.child_by_field_name("name")
567                        && let Ok(name) = std::str::from_utf8(
568                            &bytes[name_node.start_byte()..name_node.end_byte()],
569                        )
570                    {
571                        let line =
572                            u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
573                        go_classify(name, line, is_main_package, file_path, &mut entries);
574                    }
575                }
576                "method_declaration" => {
577                    // Methods participate in LibraryExport only — main / init
578                    // / Test* are exclusively free functions.
579                    if let Some(name_node) = child.child_by_field_name("name")
580                        && let Ok(name) = std::str::from_utf8(
581                            &bytes[name_node.start_byte()..name_node.end_byte()],
582                        )
583                        && !is_main_package
584                        && go_is_exported(name)
585                    {
586                        let line =
587                            u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
588                        entries.push(EntryPoint {
589                            name: name.to_string(),
590                            kind: EntryPointKind::LibraryExport,
591                            file_path: file_path.to_path_buf(),
592                            line,
593                        });
594                    }
595                }
596                _ => {}
597            }
598        }
599
600        entries
601    }
602}
603
604fn go_package_name(root: &Node<'_>, bytes: &[u8]) -> Option<String> {
605    let mut cursor = root.walk();
606    for child in root.children(&mut cursor) {
607        if child.kind() != "package_clause" {
608            continue;
609        }
610        let mut inner = child.walk();
611        for grandchild in child.children(&mut inner) {
612            if grandchild.kind() == "package_identifier"
613                && let Ok(text) =
614                    std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
615            {
616                return Some(text.to_string());
617            }
618        }
619    }
620    None
621}
622
623fn go_classify(
624    name: &str,
625    line: u32,
626    is_main_package: bool,
627    file_path: &Path,
628    out: &mut Vec<EntryPoint>,
629) {
630    if name == "main" && is_main_package {
631        out.push(EntryPoint {
632            name: name.to_string(),
633            kind: EntryPointKind::Main,
634            file_path: file_path.to_path_buf(),
635            line,
636        });
637        return;
638    }
639    if name == "init" {
640        out.push(EntryPoint {
641            name: name.to_string(),
642            kind: EntryPointKind::Init,
643            file_path: file_path.to_path_buf(),
644            line,
645        });
646        return;
647    }
648    if name.starts_with("Test")
649        || name.starts_with("Benchmark")
650        || name.starts_with("Example")
651        || name.starts_with("Fuzz")
652    {
653        out.push(EntryPoint {
654            name: name.to_string(),
655            kind: EntryPointKind::Test,
656            file_path: file_path.to_path_buf(),
657            line,
658        });
659        return;
660    }
661    if !is_main_package && go_is_exported(name) {
662        out.push(EntryPoint {
663            name: name.to_string(),
664            kind: EntryPointKind::LibraryExport,
665            file_path: file_path.to_path_buf(),
666            line,
667        });
668    }
669}
670
671/// Return true if `name` starts with an ASCII uppercase letter, which is
672/// Go's syntactic rule for an exported (package-public) identifier.
673fn go_is_exported(name: &str) -> bool {
674    name.chars().next().is_some_and(|c| c.is_ascii_uppercase())
675}
676
677// ---------------------------------------------------------------------------
678// Dispatch
679// ---------------------------------------------------------------------------
680
681/// Return the entry-point detector for a language identifier.
682///
683/// `language` is the lowercased language name as used in
684/// `crate::languages` (`"rust"`, `"python"`, `"go"`). Returns `None` for
685/// any language not yet covered by this wave; X4 will extend coverage to
686/// JS/TS, Java, C/C++, Ruby, Scala, Kotlin, Swift, and Bash.
687///
688/// File-extension dispatch (`"rs"`, `"py"`, `"pyi"`, `"go"`) is also
689/// accepted for caller convenience — the BFS walk in X2 carries
690/// extensions, not language names, through its per-file loop.
691#[must_use]
692pub fn detector_for(language: &str) -> Option<Box<dyn EntryPointDetector>> {
693    match language {
694        "rust" | "rs" => Some(Box::new(RustEntryDetector)),
695        "python" | "py" | "pyi" => Some(Box::new(PythonEntryDetector)),
696        "go" => Some(Box::new(GoEntryDetector)),
697        _ => None,
698    }
699}
700
701// ---------------------------------------------------------------------------
702// Internal helpers
703// ---------------------------------------------------------------------------
704
705/// Parse `source` with the given tree-sitter `Language`. Returns `None`
706/// if the parser cannot be configured or the parse fails.
707fn parse_with(source: &str, language: &tree_sitter::Language) -> Option<tree_sitter::Tree> {
708    let mut parser = Parser::new();
709    parser.set_language(language).ok()?;
710    parser.parse(source, None)
711}
712
713// Unused-but-keep-for-X2 helpers. These ride alongside the detector
714// implementations so X2 has a single import point for the BFS-time
715// helpers.
716//
717// `query_match_lines` returns the 1-based line of every match of a
718// compiled tree-sitter query against `source`. X2 will use this to
719// post-process the raw RepoGraph definitions when an entry-point
720// predicate fires on something that is not itself a Definition (e.g.
721// the Python `if __name__ == "__main__"` block isn't a Definition —
722// it's a top-level statement that anchors any function it calls).
723//
724// We expose it as `pub(crate)` so X2 can consume without it widening
725// the public surface.
726
727#[allow(dead_code)]
728pub(crate) fn query_match_lines(
729    source: &str,
730    language: &tree_sitter::Language,
731    query: &Query,
732) -> Vec<u32> {
733    let mut lines = Vec::new();
734    let Some(tree) = parse_with(source, language) else {
735        return lines;
736    };
737    let mut cursor = QueryCursor::new();
738    let mut matches = cursor.matches(query, tree.root_node(), source.as_bytes());
739    while let Some(m) = matches.next() {
740        for cap in m.captures {
741            let line = u32::try_from(cap.node.start_position().row + 1).unwrap_or(u32::MAX);
742            lines.push(line);
743        }
744    }
745    lines
746}