ripvec_core/entry_points.rs
1//! Per-language entry-point detection for the `find_dead_code` MCP tool
2//! (4.1.0).
3//!
4//! The trait [`EntryPointDetector`] and its per-language implementors
5//! ([`RustEntryDetector`], [`PythonEntryDetector`], [`GoEntryDetector`])
6//! identify the syntactic shapes that act as roots of the call graph: the
7//! BFS reachability walk for dead-code detection seeds from the union of
8//! all [`EntryPoint`]s emitted across the indexed corpus.
9//!
10//! This module is X1 of the 4.1.0 series; the actual reachability walk and
11//! cluster discovery (`RepoGraph::compute_dead_code`) lands in X2. The MCP
12//! tool wrapper lands in X3. The remaining language detectors land in X4.
13//! See `docs/FIND_DEAD_CODE_DESIGN.md` Section 2 for the per-language
14//! entry-point survey and Section 3 for the algorithm that consumes this
15//! output.
16//!
17//! ## Type B (Wired-Stub) self-audit note
18//!
19//! Until X2 lands, every public item in this module is consumed only from
20//! the integration tests under `crates/ripvec-core/tests/entry_points.rs`.
21//! `scripts/check_wiring_gaps.sh` will report these as Type B findings.
22//! The findings are **explicitly deferred** to X2 — see the Section 9
23//! PLAN.md entry — not silently dangling. Do not annotate with
24//! `#[doc(hidden)]`: the doc-visibility surface is part of the X2 contract
25//! and is the intended public API of the dead-code module.
26
27use std::path::{Path, PathBuf};
28
29use streaming_iterator::StreamingIterator;
30use tree_sitter::{Node, Parser, Query, QueryCursor};
31
32/// Classification of why a [`Definition`](crate::repo_map::Definition)-shaped
33/// item is treated as an entry point for the dead-code reachability walk.
34///
35/// Categories follow Section 2 of `docs/FIND_DEAD_CODE_DESIGN.md`. The
36/// classification is per-detection, not per-definition: the same
37/// `pub fn` can appear as both [`EntryPointKind::Main`] (for binaries) and
38/// [`EntryPointKind::LibraryExport`] (for libraries) depending on how the
39/// containing crate is structured. Downstream consumers (X2) treat each
40/// detection independently.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
42pub enum EntryPointKind {
43 /// A binary-crate `main`-shaped entry: `fn main()` in Rust, `func main()`
44 /// in Go, the `if __name__ == "__main__"` block in Python.
45 Main,
46
47 /// A public-API surface item: `pub` re-exports in Rust libraries,
48 /// `__all__` exports in Python, capitalised names in Go libraries.
49 LibraryExport,
50
51 /// A test entry: `#[test]` / `#[bench]` in Rust, `def test_*` /
52 /// `*_test.py` in Python, `func TestX` / `BenchmarkX` / `ExampleX` /
53 /// `FuzzX` in Go.
54 Test,
55
56 /// A foreign-function-interface entry: `#[no_mangle]` /
57 /// `extern "C"` in Rust, cgo `//export` in Go.
58 Ffi,
59
60 /// A procedural-macro entry: `#[proc_macro]`, `#[proc_macro_derive]`,
61 /// `#[proc_macro_attribute]` in Rust.
62 ProcMacro,
63
64 /// A package-initialisation entry: `func init()` in Go.
65 Init,
66
67 /// A build-script entry: Cargo's `build.rs`.
68 BuildScript,
69}
70
71/// A single entry-point detection in one source file.
72///
73/// Per-detection, not per-definition — the same `pub fn` can produce
74/// multiple `EntryPoint` instances (one for each matching predicate).
75/// Downstream consumers should treat each detection as an independent
76/// reachability seed.
77#[derive(Debug, Clone, PartialEq, Eq)]
78pub struct EntryPoint {
79 /// The symbol name of the entry point. For Rust this is the function
80 /// item identifier; for Python it is the function or module-level
81 /// expression name; for Go it is the function declaration identifier.
82 pub name: String,
83
84 /// Why this item was treated as an entry point.
85 pub kind: EntryPointKind,
86
87 /// The source file the entry point was detected in.
88 pub file_path: PathBuf,
89
90 /// 1-based line number of the entry point declaration. Matches the
91 /// `start_line` field of [`crate::repo_map::Definition`].
92 pub line: u32,
93}
94
95/// Per-language entry-point detector.
96///
97/// Designed for consumption by `RepoGraph::compute_dead_code` in
98/// 4.1.0-X2. Until X2 lands, the only consumers are the integration tests
99/// under `crates/ripvec-core/tests/entry_points.rs` — see the
100/// module-level docstring for the Type B (Wired-Stub) self-audit note.
101///
102/// Implementations parse the source once per call. The parsing cost is
103/// trivial (tree-sitter is O(n) and the source is already in memory at
104/// detection time), and stateless parsers compose more cleanly than a
105/// shared parser cache across the three (and eventually eleven) language
106/// detectors. X2's `RepoGraph::compute_dead_code` already iterates
107/// per-file, so the per-file parse adds no additional walk cost.
108pub trait EntryPointDetector {
109 /// Return every entry point declared in this source file.
110 ///
111 /// `source` is the full UTF-8 contents of `file_path`. The path is
112 /// passed alongside `source` so detectors that consider filename
113 /// patterns (e.g. Python's `test_*.py` and `*_test.py`,
114 /// Rust's `build.rs`) can use both signals.
115 ///
116 /// If parsing fails, returns an empty vector — entry-point detection
117 /// is best-effort and should never abort the dead-code walk.
118 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint>;
119}
120
121// ---------------------------------------------------------------------------
122// Rust detector
123// ---------------------------------------------------------------------------
124
125/// Rust entry-point detector.
126///
127/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
128/// - `pub fn main()` and bare `fn main()` (Main)
129/// - `pub fn` items in `lib.rs` / `mod.rs` (LibraryExport)
130/// - Items annotated with `#[test]` or `#[bench]` (Test)
131/// - Items annotated with `#[no_mangle]` or marked `extern "C"` (Ffi)
132/// - Items annotated with `#[proc_macro]`, `#[proc_macro_derive]`, or
133/// `#[proc_macro_attribute]` (ProcMacro)
134/// - The entire `build.rs` file is treated as a single BuildScript entry
135/// point (the build script's `main` is the cargo-known entry).
136#[derive(Debug, Default, Clone, Copy)]
137pub struct RustEntryDetector;
138
139impl EntryPointDetector for RustEntryDetector {
140 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
141 let mut entries = Vec::new();
142 let Some(tree) = parse_with(source, &tree_sitter_rust::LANGUAGE.into()) else {
143 return entries;
144 };
145 let root = tree.root_node();
146 let bytes = source.as_bytes();
147
148 // Treat the entire build.rs file as a single BuildScript entry.
149 // The crate's main may be named anything inside build.rs (cargo
150 // calls the file's main), so we emit one entry at line 1.
151 if file_path.file_name().and_then(|s| s.to_str()) == Some("build.rs") {
152 entries.push(EntryPoint {
153 name: "build.rs".to_string(),
154 kind: EntryPointKind::BuildScript,
155 file_path: file_path.to_path_buf(),
156 line: 1,
157 });
158 }
159
160 let is_lib_or_mod_rs = matches!(
161 file_path.file_name().and_then(|s| s.to_str()),
162 Some("lib.rs" | "mod.rs")
163 );
164
165 // Walk every function_item declaration recursively. For each item:
166 // - inspect its preceding attribute_item siblings for #[test],
167 // #[bench], #[no_mangle], #[proc_macro*]
168 // - inspect the function_item's own modifiers for `extern "C"`
169 // - inspect the name for `main`
170 // - if file is lib.rs/mod.rs and the item is `pub`, emit
171 // LibraryExport
172 visit_rust_node(&root, bytes, file_path, is_lib_or_mod_rs, &mut entries);
173 entries
174 }
175}
176
177fn visit_rust_node(
178 node: &Node<'_>,
179 bytes: &[u8],
180 file_path: &Path,
181 is_lib_or_mod_rs: bool,
182 out: &mut Vec<EntryPoint>,
183) {
184 if node.kind() == "function_item" {
185 rust_classify_function(node, bytes, file_path, is_lib_or_mod_rs, out);
186 }
187 let mut cursor = node.walk();
188 for child in node.children(&mut cursor) {
189 visit_rust_node(&child, bytes, file_path, is_lib_or_mod_rs, out);
190 }
191}
192
193fn rust_classify_function(
194 node: &Node<'_>,
195 bytes: &[u8],
196 file_path: &Path,
197 is_lib_or_mod_rs: bool,
198 out: &mut Vec<EntryPoint>,
199) {
200 // Find the function name. function_item has a `name` field whose
201 // value is an identifier child.
202 let name_node = node.child_by_field_name("name");
203 let Some(name_node) = name_node else { return };
204 let Ok(name) = std::str::from_utf8(&bytes[name_node.start_byte()..name_node.end_byte()]) else {
205 return;
206 };
207 let line = u32::try_from(node.start_position().row + 1).unwrap_or(u32::MAX);
208
209 // Gather attributes that immediately precede this function. In
210 // tree-sitter-rust the attributes are SIBLING attribute_item nodes,
211 // not children of the function_item, so we walk previous siblings.
212 let attrs = collect_preceding_rust_attrs(node, bytes);
213
214 // A single function item may match multiple predicates (e.g. a
215 // `#[no_mangle] pub extern "C" fn main` in `lib.rs` is both Ffi
216 // and Main and LibraryExport). Emit one EntryPoint per matching
217 // predicate; the BFS in X2 treats each detection as a distinct
218 // reachability seed.
219
220 // #[proc_macro], #[proc_macro_derive], #[proc_macro_attribute].
221 if attrs.iter().any(|a| {
222 a.starts_with("proc_macro_derive")
223 || a.starts_with("proc_macro_attribute")
224 || a == "proc_macro"
225 || a.starts_with("proc_macro(")
226 }) {
227 out.push(EntryPoint {
228 name: name.to_string(),
229 kind: EntryPointKind::ProcMacro,
230 file_path: file_path.to_path_buf(),
231 line,
232 });
233 }
234
235 // #[test] / #[bench].
236 if attrs.iter().any(|a| a == "test" || a == "bench") {
237 out.push(EntryPoint {
238 name: name.to_string(),
239 kind: EntryPointKind::Test,
240 file_path: file_path.to_path_buf(),
241 line,
242 });
243 }
244
245 // FFI: #[no_mangle] OR `extern "C"` in the function declaration.
246 let function_text =
247 std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]).unwrap_or("");
248 let has_extern_c =
249 rust_function_has_extern_c(node, bytes) || function_text.contains("extern \"C\"");
250 if attrs.iter().any(|a| a == "no_mangle") || has_extern_c {
251 out.push(EntryPoint {
252 name: name.to_string(),
253 kind: EntryPointKind::Ffi,
254 file_path: file_path.to_path_buf(),
255 line,
256 });
257 }
258
259 // Main: `fn main` (with or without `pub`).
260 if name == "main" {
261 out.push(EntryPoint {
262 name: name.to_string(),
263 kind: EntryPointKind::Main,
264 file_path: file_path.to_path_buf(),
265 line,
266 });
267 }
268
269 // LibraryExport: `pub fn` in lib.rs / mod.rs.
270 if is_lib_or_mod_rs && rust_function_is_pub(node, bytes) {
271 out.push(EntryPoint {
272 name: name.to_string(),
273 kind: EntryPointKind::LibraryExport,
274 file_path: file_path.to_path_buf(),
275 line,
276 });
277 }
278}
279
280/// Collect the text of every `#[...]` attribute node that immediately
281/// precedes this function_item in source order. The returned strings are
282/// the attribute path/identifier (e.g. `"test"`, `"no_mangle"`,
283/// `"proc_macro_derive(Foo)"`), with the leading `#[` and trailing `]`
284/// stripped, and any leading `outer_attribute_item` `#[` punctuation
285/// removed.
286fn collect_preceding_rust_attrs(node: &Node<'_>, bytes: &[u8]) -> Vec<String> {
287 let mut attrs = Vec::new();
288 let mut prev = node.prev_sibling();
289 while let Some(p) = prev {
290 if p.kind() == "attribute_item" || p.kind() == "inner_attribute_item" {
291 // The attribute_item child structure is `# [ attribute ]`;
292 // pull the `attribute` child and use its text.
293 let mut cursor = p.walk();
294 let mut attr_text: Option<String> = None;
295 for child in p.children(&mut cursor) {
296 if child.kind() == "attribute"
297 && let Ok(text) =
298 std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
299 {
300 attr_text = Some(text.to_string());
301 }
302 }
303 if let Some(t) = attr_text {
304 attrs.push(t);
305 }
306 prev = p.prev_sibling();
307 } else if p.kind().starts_with("line_comment") || p.kind().starts_with("block_comment") {
308 prev = p.prev_sibling();
309 } else {
310 break;
311 }
312 }
313 attrs
314}
315
316/// Return true if the function_item node has a `pub` visibility modifier.
317fn rust_function_is_pub(node: &Node<'_>, bytes: &[u8]) -> bool {
318 let mut cursor = node.walk();
319 for child in node.children(&mut cursor) {
320 if child.kind() == "visibility_modifier"
321 && let Ok(text) = std::str::from_utf8(&bytes[child.start_byte()..child.end_byte()])
322 {
323 return text.starts_with("pub");
324 }
325 }
326 false
327}
328
329/// Return true if the function_item has an `extern "C"` ABI declaration
330/// as a function-modifier child (e.g. `pub extern "C" fn bar()`).
331fn rust_function_has_extern_c(node: &Node<'_>, bytes: &[u8]) -> bool {
332 let mut cursor = node.walk();
333 for child in node.children(&mut cursor) {
334 // tree-sitter-rust uses `function_modifiers` containing
335 // `extern_modifier`; the latter's child is a `string_literal`
336 // with the ABI name.
337 if child.kind() != "function_modifiers" {
338 continue;
339 }
340 let mut inner = child.walk();
341 for grandchild in child.children(&mut inner) {
342 if grandchild.kind() == "extern_modifier"
343 && let Ok(text) =
344 std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
345 && text.contains("\"C\"")
346 {
347 return true;
348 }
349 }
350 }
351 false
352}
353
354// ---------------------------------------------------------------------------
355// Python detector
356// ---------------------------------------------------------------------------
357
358/// Python entry-point detector.
359///
360/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
361/// - `if __name__ == "__main__":` blocks at module top level (Main)
362/// - Top-level functions named in `__all__` (LibraryExport)
363/// - Functions starting with `test_` in files matching `test_*.py` /
364/// `*_test.py` or under a `tests/` directory (Test)
365///
366/// Framework decorators (`@click.command`, `@app.route`,
367/// `@pytest.fixture`) are not yet captured — see X4 for the framework
368/// pass.
369#[derive(Debug, Default, Clone, Copy)]
370pub struct PythonEntryDetector;
371
372impl EntryPointDetector for PythonEntryDetector {
373 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
374 let mut entries = Vec::new();
375 let Some(tree) = parse_with(source, &tree_sitter_python::LANGUAGE.into()) else {
376 return entries;
377 };
378 let root = tree.root_node();
379 let bytes = source.as_bytes();
380
381 let is_test_file = python_is_test_file(file_path);
382
383 // Module top-level statements.
384 let mut cursor = root.walk();
385 for child in root.children(&mut cursor) {
386 match child.kind() {
387 "if_statement" if python_is_dunder_main_block(&child, bytes) => {
388 let line = u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
389 entries.push(EntryPoint {
390 name: "__main__".to_string(),
391 kind: EntryPointKind::Main,
392 file_path: file_path.to_path_buf(),
393 line,
394 });
395 }
396 "expression_statement" => {
397 // `__all__ = [...]` is an expression_statement
398 // containing an assignment.
399 if let Some(names) = python_extract_dunder_all(&child, bytes) {
400 let line =
401 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
402 for n in names {
403 entries.push(EntryPoint {
404 name: n,
405 kind: EntryPointKind::LibraryExport,
406 file_path: file_path.to_path_buf(),
407 line,
408 });
409 }
410 }
411 }
412 "function_definition" | "decorated_definition" => {
413 let fn_node = if child.kind() == "decorated_definition" {
414 child.child_by_field_name("definition")
415 } else {
416 Some(child)
417 };
418 if let Some(fn_node) = fn_node
419 && fn_node.kind() == "function_definition"
420 && let Some(name_node) = fn_node.child_by_field_name("name")
421 && let Ok(name) = std::str::from_utf8(
422 &bytes[name_node.start_byte()..name_node.end_byte()],
423 )
424 && is_test_file
425 && name.starts_with("test_")
426 {
427 let line =
428 u32::try_from(fn_node.start_position().row + 1).unwrap_or(u32::MAX);
429 entries.push(EntryPoint {
430 name: name.to_string(),
431 kind: EntryPointKind::Test,
432 file_path: file_path.to_path_buf(),
433 line,
434 });
435 }
436 }
437 _ => {}
438 }
439 }
440
441 entries
442 }
443}
444
445fn python_is_test_file(file_path: &Path) -> bool {
446 let Some(file_name) = file_path.file_name().and_then(|s| s.to_str()) else {
447 return false;
448 };
449 let is_py = Path::new(file_name)
450 .extension()
451 .is_some_and(|ext| ext.eq_ignore_ascii_case("py"));
452 if !is_py {
453 return false;
454 }
455 let stem = Path::new(file_name)
456 .file_stem()
457 .and_then(|s| s.to_str())
458 .unwrap_or("");
459 if stem.starts_with("test_") || stem.ends_with("_test") {
460 return true;
461 }
462 // Any component named `tests` in the parent directory chain.
463 file_path
464 .components()
465 .any(|c| c.as_os_str() == std::ffi::OsStr::new("tests"))
466}
467
468fn python_is_dunder_main_block(node: &Node<'_>, bytes: &[u8]) -> bool {
469 // if condition: comparison `__name__ == "__main__"`.
470 let cond = node.child_by_field_name("condition");
471 let Some(cond) = cond else { return false };
472 let Ok(text) = std::str::from_utf8(&bytes[cond.start_byte()..cond.end_byte()]) else {
473 return false;
474 };
475 // Tolerate single or double quotes around `__main__`.
476 let normalized = text.replace(' ', "");
477 normalized.contains("__name__==\"__main__\"")
478 || normalized.contains("__name__=='__main__'")
479 || normalized.contains("\"__main__\"==__name__")
480 || normalized.contains("'__main__'==__name__")
481}
482
483/// Extract the string literals from a top-level `__all__ = [...]`
484/// assignment. Returns `None` if the statement is not such an assignment.
485fn python_extract_dunder_all(node: &Node<'_>, bytes: &[u8]) -> Option<Vec<String>> {
486 // expression_statement -> assignment (left, right)
487 let mut cursor = node.walk();
488 for child in node.children(&mut cursor) {
489 if child.kind() == "assignment" {
490 let left = child.child_by_field_name("left")?;
491 let right = child.child_by_field_name("right")?;
492 let left_text = std::str::from_utf8(&bytes[left.start_byte()..left.end_byte()]).ok()?;
493 if left_text.trim() != "__all__" {
494 return None;
495 }
496 // right is typically a `list` or `tuple` node containing
497 // `string` children.
498 let mut names = Vec::new();
499 let mut inner = right.walk();
500 for grandchild in right.children(&mut inner) {
501 if grandchild.kind() != "string" {
502 continue;
503 }
504 // Walk the string node to find string_content child.
505 let mut sc = grandchild.walk();
506 let mut content_text: Option<String> = None;
507 for sg in grandchild.children(&mut sc) {
508 if sg.kind() == "string_content"
509 && let Ok(t) = std::str::from_utf8(&bytes[sg.start_byte()..sg.end_byte()])
510 {
511 content_text = Some(t.to_string());
512 }
513 }
514 if let Some(t) = content_text {
515 names.push(t);
516 } else if let Ok(raw) =
517 std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
518 {
519 // Fallback: strip the outer quotes from the raw
520 // string text.
521 let trimmed = raw.trim_matches(|c| c == '"' || c == '\'');
522 names.push(trimmed.to_string());
523 }
524 }
525 return Some(names);
526 }
527 }
528 None
529}
530
531// ---------------------------------------------------------------------------
532// Go detector
533// ---------------------------------------------------------------------------
534
535/// Go entry-point detector.
536///
537/// Detects (per `docs/FIND_DEAD_CODE_DESIGN.md` Section 2):
538/// - `func main()` in `package main` (Main)
539/// - `func init()` (Init) — runs automatically at package load
540/// - Functions starting with `Test`, `Benchmark`, `Example`, `Fuzz` (Test)
541/// - Exported names (starting with uppercase) in library packages
542/// (LibraryExport)
543#[derive(Debug, Default, Clone, Copy)]
544pub struct GoEntryDetector;
545
546impl EntryPointDetector for GoEntryDetector {
547 fn detect(&self, source: &str, file_path: &Path) -> Vec<EntryPoint> {
548 let mut entries = Vec::new();
549 let Some(tree) = parse_with(source, &tree_sitter_go::LANGUAGE.into()) else {
550 return entries;
551 };
552 let root = tree.root_node();
553 let bytes = source.as_bytes();
554
555 // Determine the package name. `package main` enables `main` as
556 // the binary entry; non-main packages are libraries whose
557 // exported names are library entries.
558 let package_name = go_package_name(&root, bytes).unwrap_or_default();
559 let is_main_package = package_name == "main";
560
561 // Walk top-level function_declaration / method_declaration nodes.
562 let mut cursor = root.walk();
563 for child in root.children(&mut cursor) {
564 match child.kind() {
565 "function_declaration" => {
566 if let Some(name_node) = child.child_by_field_name("name")
567 && let Ok(name) = std::str::from_utf8(
568 &bytes[name_node.start_byte()..name_node.end_byte()],
569 )
570 {
571 let line =
572 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
573 go_classify(name, line, is_main_package, file_path, &mut entries);
574 }
575 }
576 "method_declaration" => {
577 // Methods participate in LibraryExport only — main / init
578 // / Test* are exclusively free functions.
579 if let Some(name_node) = child.child_by_field_name("name")
580 && let Ok(name) = std::str::from_utf8(
581 &bytes[name_node.start_byte()..name_node.end_byte()],
582 )
583 && !is_main_package
584 && go_is_exported(name)
585 {
586 let line =
587 u32::try_from(child.start_position().row + 1).unwrap_or(u32::MAX);
588 entries.push(EntryPoint {
589 name: name.to_string(),
590 kind: EntryPointKind::LibraryExport,
591 file_path: file_path.to_path_buf(),
592 line,
593 });
594 }
595 }
596 _ => {}
597 }
598 }
599
600 entries
601 }
602}
603
604fn go_package_name(root: &Node<'_>, bytes: &[u8]) -> Option<String> {
605 let mut cursor = root.walk();
606 for child in root.children(&mut cursor) {
607 if child.kind() != "package_clause" {
608 continue;
609 }
610 let mut inner = child.walk();
611 for grandchild in child.children(&mut inner) {
612 if grandchild.kind() == "package_identifier"
613 && let Ok(text) =
614 std::str::from_utf8(&bytes[grandchild.start_byte()..grandchild.end_byte()])
615 {
616 return Some(text.to_string());
617 }
618 }
619 }
620 None
621}
622
623fn go_classify(
624 name: &str,
625 line: u32,
626 is_main_package: bool,
627 file_path: &Path,
628 out: &mut Vec<EntryPoint>,
629) {
630 if name == "main" && is_main_package {
631 out.push(EntryPoint {
632 name: name.to_string(),
633 kind: EntryPointKind::Main,
634 file_path: file_path.to_path_buf(),
635 line,
636 });
637 return;
638 }
639 if name == "init" {
640 out.push(EntryPoint {
641 name: name.to_string(),
642 kind: EntryPointKind::Init,
643 file_path: file_path.to_path_buf(),
644 line,
645 });
646 return;
647 }
648 if name.starts_with("Test")
649 || name.starts_with("Benchmark")
650 || name.starts_with("Example")
651 || name.starts_with("Fuzz")
652 {
653 out.push(EntryPoint {
654 name: name.to_string(),
655 kind: EntryPointKind::Test,
656 file_path: file_path.to_path_buf(),
657 line,
658 });
659 return;
660 }
661 if !is_main_package && go_is_exported(name) {
662 out.push(EntryPoint {
663 name: name.to_string(),
664 kind: EntryPointKind::LibraryExport,
665 file_path: file_path.to_path_buf(),
666 line,
667 });
668 }
669}
670
671/// Return true if `name` starts with an ASCII uppercase letter, which is
672/// Go's syntactic rule for an exported (package-public) identifier.
673fn go_is_exported(name: &str) -> bool {
674 name.chars().next().is_some_and(|c| c.is_ascii_uppercase())
675}
676
677// ---------------------------------------------------------------------------
678// Dispatch
679// ---------------------------------------------------------------------------
680
681/// Return the entry-point detector for a language identifier.
682///
683/// `language` is the lowercased language name as used in
684/// `crate::languages` (`"rust"`, `"python"`, `"go"`). Returns `None` for
685/// any language not yet covered by this wave; X4 will extend coverage to
686/// JS/TS, Java, C/C++, Ruby, Scala, Kotlin, Swift, and Bash.
687///
688/// File-extension dispatch (`"rs"`, `"py"`, `"pyi"`, `"go"`) is also
689/// accepted for caller convenience — the BFS walk in X2 carries
690/// extensions, not language names, through its per-file loop.
691#[must_use]
692pub fn detector_for(language: &str) -> Option<Box<dyn EntryPointDetector>> {
693 match language {
694 "rust" | "rs" => Some(Box::new(RustEntryDetector)),
695 "python" | "py" | "pyi" => Some(Box::new(PythonEntryDetector)),
696 "go" => Some(Box::new(GoEntryDetector)),
697 _ => None,
698 }
699}
700
701// ---------------------------------------------------------------------------
702// Internal helpers
703// ---------------------------------------------------------------------------
704
705/// Parse `source` with the given tree-sitter `Language`. Returns `None`
706/// if the parser cannot be configured or the parse fails.
707fn parse_with(source: &str, language: &tree_sitter::Language) -> Option<tree_sitter::Tree> {
708 let mut parser = Parser::new();
709 parser.set_language(language).ok()?;
710 parser.parse(source, None)
711}
712
713// Unused-but-keep-for-X2 helpers. These ride alongside the detector
714// implementations so X2 has a single import point for the BFS-time
715// helpers.
716//
717// `query_match_lines` returns the 1-based line of every match of a
718// compiled tree-sitter query against `source`. X2 will use this to
719// post-process the raw RepoGraph definitions when an entry-point
720// predicate fires on something that is not itself a Definition (e.g.
721// the Python `if __name__ == "__main__"` block isn't a Definition —
722// it's a top-level statement that anchors any function it calls).
723//
724// We expose it as `pub(crate)` so X2 can consume without it widening
725// the public surface.
726
727#[allow(dead_code)]
728pub(crate) fn query_match_lines(
729 source: &str,
730 language: &tree_sitter::Language,
731 query: &Query,
732) -> Vec<u32> {
733 let mut lines = Vec::new();
734 let Some(tree) = parse_with(source, language) else {
735 return lines;
736 };
737 let mut cursor = QueryCursor::new();
738 let mut matches = cursor.matches(query, tree.root_node(), source.as_bytes());
739 while let Some(m) = matches.next() {
740 for cap in m.captures {
741 let line = u32::try_from(cap.node.start_position().row + 1).unwrap_or(u32::MAX);
742 lines.push(line);
743 }
744 }
745 lines
746}