1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use tree_sitter::{Language, Parser, Tree};
5
6use crate::core::code_graph::CodeGraph;
7use crate::core::moniker::Moniker;
8
9use crate::lang::canonical_walker::CanonicalWalker;
10
11pub mod build;
12mod canonicalize;
13mod kinds;
14mod strategy;
15
16use canonicalize::{compute_module_moniker, read_package_name};
17use strategy::{Strategy, collect_callable_table, collect_type_table};
18
19#[derive(Clone, Debug, Default)]
20pub struct Presets {
21 pub external_packages: Vec<String>,
22}
23
24pub fn parse(source: &str) -> Tree {
25 let mut parser = Parser::new();
26 let language: Language = tree_sitter_java::LANGUAGE.into();
27 parser
28 .set_language(&language)
29 .expect("failed to load tree-sitter Java grammar");
30 parser
31 .parse(source, None)
32 .expect("tree-sitter parse returned None on a non-cancelled call")
33}
34
35pub fn extract(
36 uri: &str,
37 source: &str,
38 anchor: &Moniker,
39 deep: bool,
40 presets: &Presets,
41) -> CodeGraph {
42 let tree = parse(source);
43 let pkg = read_package_name(tree.root_node(), source.as_bytes());
44 let pieces: Vec<&str> = pkg.split('.').filter(|s| !s.is_empty()).collect();
45 let module = compute_module_moniker(anchor, uri, &pieces);
46 let (def_cap, ref_cap) = CodeGraph::capacity_for_source(source.len());
47 let mut graph = CodeGraph::with_capacity(module.clone(), kinds::MODULE, def_cap, ref_cap);
48 let mut type_table: HashMap<&[u8], Moniker> = HashMap::new();
49 collect_type_table(
50 tree.root_node(),
51 source.as_bytes(),
52 &module,
53 &mut type_table,
54 );
55 let mut callable_table: HashMap<(Moniker, Vec<u8>), Vec<u8>> = HashMap::new();
56 collect_callable_table(
57 tree.root_node(),
58 source.as_bytes(),
59 &module,
60 &mut callable_table,
61 );
62 let strat = Strategy {
63 module: module.clone(),
64 source_bytes: source.as_bytes(),
65 deep,
66 presets,
67 imports: RefCell::new(HashMap::<Vec<u8>, &'static [u8]>::new()),
68 local_scope: RefCell::new(Vec::new()),
69 type_table,
70 callable_table,
71 };
72 let walker = CanonicalWalker::new(&strat, source.as_bytes());
73 walker.walk(tree.root_node(), &module, &mut graph);
74 graph
75}
76
77pub struct Lang;
78
79impl crate::lang::LangExtractor for Lang {
80 type Presets = Presets;
81 const LANG_TAG: &'static str = "java";
82 const ALLOWED_KINDS: &'static [&'static str] = &[
83 "class",
84 "interface",
85 "enum",
86 "record",
87 "annotation_type",
88 "method",
89 "constructor",
90 "field",
91 "enum_constant",
92 ];
93 const ALLOWED_VISIBILITIES: &'static [&'static str] =
94 &["public", "protected", "package", "private"];
95
96 fn extract(
97 uri: &str,
98 source: &str,
99 anchor: &Moniker,
100 deep: bool,
101 presets: &Self::Presets,
102 ) -> CodeGraph {
103 extract(uri, source, anchor, deep, presets)
104 }
105}
106
107#[cfg(test)]
108mod tests {
109 use super::*;
110 use crate::core::moniker::MonikerBuilder;
111 use crate::lang::assert_conformance;
112
113 fn make_anchor() -> Moniker {
114 MonikerBuilder::new().project(b"app").build()
115 }
116
117 fn extract_default(uri: &str, source: &str, anchor: &Moniker, deep: bool) -> CodeGraph {
118 let g = extract(uri, source, anchor, deep, &Presets::default());
119 assert_conformance::<super::Lang>(&g, anchor);
120 g
121 }
122
123 #[test]
124 fn parse_empty_returns_program() {
125 let tree = parse("");
126 assert_eq!(tree.root_node().kind(), "program");
127 }
128
129 #[test]
130 fn extract_emits_comment_def_per_comment_node() {
131 let src = "// a\n/* b */\nclass Foo {}\n";
132 let g = extract_default("src/Foo.java", src, &make_anchor(), false);
133 let n = g.defs().filter(|d| d.kind == b"comment").count();
134 assert_eq!(n, 2);
135 }
136
137 #[test]
138 fn extract_module_uses_package_decl_and_class_filename() {
139 let src = "package com.acme;\nclass Foo {}\n";
140 let g = extract_default("src/Foo.java", src, &make_anchor(), false);
141 let expected = MonikerBuilder::new()
142 .project(b"app")
143 .segment(b"lang", b"java")
144 .segment(b"package", b"com")
145 .segment(b"package", b"acme")
146 .segment(b"module", b"Foo")
147 .build();
148 assert_eq!(g.root(), &expected);
149 }
150
151 #[test]
152 fn extract_default_package_skips_package_segments() {
153 let g = extract_default("Foo.java", "class Foo {}", &make_anchor(), false);
154 let expected = MonikerBuilder::new()
155 .project(b"app")
156 .segment(b"lang", b"java")
157 .segment(b"module", b"Foo")
158 .build();
159 assert_eq!(g.root(), &expected);
160 }
161
162 #[test]
163 fn extract_class_emits_class_def_with_package_visibility_default() {
164 let g = extract_default("Foo.java", "class Foo {}", &make_anchor(), false);
165 let foo = g.defs().find(|d| d.kind == b"class").expect("class def");
166 assert_eq!(foo.visibility, b"package".to_vec());
167 }
168
169 #[test]
170 fn extract_class_with_public_modifier_carries_visibility_public() {
171 let g = extract_default("Foo.java", "public class Foo {}", &make_anchor(), false);
172 let foo = g.defs().find(|d| d.kind == b"class").unwrap();
173 assert_eq!(foo.visibility, b"public".to_vec());
174 }
175
176 #[test]
177 fn extract_method_signature_in_moniker_and_signature_column() {
178 let src = r#"
179 public class Foo {
180 public int findById(int id, String name) { return id; }
181 }
182 "#;
183 let g = extract_default("Foo.java", src, &make_anchor(), false);
184 let m = g.defs().find(|d| d.kind == b"method").expect("method def");
185 let last = m.moniker.as_view().segments().last().unwrap();
186 assert_eq!(last.kind, b"method");
187 assert_eq!(last.name, b"findById(id:int,name:String)");
188 assert_eq!(m.signature, b"id:int,name:String".to_vec());
189 assert_eq!(m.visibility, b"public".to_vec());
190 }
191
192 #[test]
193 fn extract_constructor_uses_constructor_kind() {
194 let src = r#"
195 public class Foo {
196 public Foo(int x) {}
197 }
198 "#;
199 let g = extract_default("Foo.java", src, &make_anchor(), false);
200 assert!(g.defs().any(|d| d.kind == b"constructor"));
201 }
202
203 #[test]
204 fn extract_field_one_def_per_declarator() {
205 let src = "class Foo { int a, b; private String name; }";
206 let g = extract_default("Foo.java", src, &make_anchor(), false);
207 let fields: Vec<_> = g.defs().filter(|d| d.kind == b"field").collect();
208 assert_eq!(
209 fields.len(),
210 3,
211 "got {:?}",
212 fields.iter().map(|d| &d.moniker).collect::<Vec<_>>()
213 );
214 let private_field = fields
215 .iter()
216 .find(|d| d.moniker.as_view().segments().last().unwrap().name == b"name")
217 .unwrap();
218 assert_eq!(private_field.visibility, b"private".to_vec());
219 }
220
221 #[test]
222 fn extract_enum_emits_enum_constants() {
223 let g = extract_default(
224 "Color.java",
225 "public enum Color { RED, GREEN }",
226 &make_anchor(),
227 false,
228 );
229 let red = MonikerBuilder::new()
230 .project(b"app")
231 .segment(b"lang", b"java")
232 .segment(b"module", b"Color")
233 .segment(b"enum", b"Color")
234 .segment(b"enum_constant", b"RED")
235 .build();
236 assert!(
237 g.contains(&red),
238 "missing RED, defs: {:?}",
239 g.def_monikers()
240 );
241 }
242
243 #[test]
244 fn extract_record_emits_record_def() {
245 let g = extract_default(
246 "Point.java",
247 "public record Point(int x, int y) {}",
248 &make_anchor(),
249 false,
250 );
251 let pt = g.defs().find(|d| d.kind == b"record").expect("record def");
252 assert_eq!(pt.visibility, b"public".to_vec());
253 }
254
255 #[test]
256 fn extract_extends_and_implements_emit_refs() {
257 let src = r#"
258 public class A extends B implements I, J {}
259 "#;
260 let g = extract_default("A.java", src, &make_anchor(), false);
261 let kinds: Vec<&[u8]> = g.refs().map(|r| r.kind.as_slice()).collect();
262 assert_eq!(kinds.iter().filter(|k| **k == b"extends").count(), 1);
263 assert_eq!(kinds.iter().filter(|k| **k == b"implements").count(), 2);
264 }
265
266 #[test]
267 fn extract_named_jdk_import_marks_external() {
268 let src = "import java.util.List;\nclass Foo {}";
269 let g = extract_default("Foo.java", src, &make_anchor(), false);
270 let r = g
271 .refs()
272 .find(|r| r.kind == b"imports_symbol")
273 .expect("imports_symbol ref");
274 assert_eq!(r.confidence, b"external".to_vec());
275 }
276
277 #[test]
278 fn extract_wildcard_import_emits_imports_module() {
279 let src = "import com.acme.*;\nclass Foo {}";
280 let g = extract_default("Foo.java", src, &make_anchor(), false);
281 let r = g
282 .refs()
283 .find(|r| r.kind == b"imports_module")
284 .expect("imports_module ref");
285 assert_eq!(r.confidence, b"imported".to_vec());
286 }
287
288 #[test]
289 fn extract_method_call_carries_receiver_hint() {
290 let src = r#"
291 class Foo {
292 void m() { this.bar(); }
293 void bar() {}
294 }
295 "#;
296 let g = extract_default("Foo.java", src, &make_anchor(), false);
297 let r = g
298 .refs()
299 .find(|r| r.kind == b"method_call")
300 .expect("method_call ref");
301 assert_eq!(r.receiver_hint, b"this".to_vec());
302 }
303
304 #[test]
305 fn extract_method_call_receiver_hint_carries_identifier_text() {
306 let src = r#"
307 class Foo {
308 void m() { obj.bar(); }
309 }
310 "#;
311 let g = extract_default("Foo.java", src, &make_anchor(), false);
312 let r = g
313 .refs()
314 .find(|r| r.kind == b"method_call")
315 .expect("method_call ref");
316 assert_eq!(
317 r.receiver_hint,
318 b"obj".to_vec(),
319 "receiver hint must carry the local identifier text",
320 );
321 }
322
323 #[test]
324 fn method_call_on_imported_class_carries_imported_confidence() {
325 let src = r#"
326 import com.acme.Util;
327 class Foo {
328 void m() { Util.run(); }
329 }
330 "#;
331 let g = extract_default("src/Foo.java", src, &make_anchor(), false);
332 let r = g
333 .refs()
334 .find(|r| r.kind == b"method_call" && r.receiver_hint == b"Util")
335 .expect("method_call on Util");
336 assert_eq!(r.confidence, b"imported");
337 }
338
339 #[test]
340 fn method_call_on_jdk_class_carries_external_confidence() {
341 let src = r#"
342 import java.util.List;
343 class Foo {
344 void m() { List.of(); }
345 }
346 "#;
347 let g = extract_default("src/Foo.java", src, &make_anchor(), false);
348 let r = g
349 .refs()
350 .find(|r| r.kind == b"method_call" && r.receiver_hint == b"List")
351 .expect("method_call on List");
352 assert_eq!(r.confidence, b"external");
353 }
354
355 #[test]
356 fn method_call_on_non_imported_identifier_stays_name_match() {
357 let src = r#"
358 class Foo {
359 void m() { obj.bar(); }
360 }
361 "#;
362 let g = extract_default("src/Foo.java", src, &make_anchor(), false);
363 let r = g
364 .refs()
365 .find(|r| r.kind == b"method_call" && r.receiver_hint == b"obj")
366 .expect("method_call on obj");
367 assert_eq!(r.confidence, b"name_match");
368 }
369
370 #[test]
371 fn this_call_resolves_to_full_slot_signature() {
372 let src = r#"
373 class Foo {
374 void m() { this.bar(); }
375 void bar() {}
376 }
377 "#;
378 let g = extract_default("Foo.java", src, &make_anchor(), false);
379 let r = g
380 .refs()
381 .find(|r| r.kind == b"method_call")
382 .expect("method_call ref");
383 let last = r.target.as_view().segments().last().unwrap();
384 assert_eq!(last.kind, b"method");
385 assert_eq!(
386 last.name, b"bar()",
387 "this.bar() must resolve to the def's slot signature, not to a name-only fallback"
388 );
389 }
390
391 #[test]
392 fn bare_call_to_same_class_method_resolves_to_full_slot_signature() {
393 let src = r#"
394 class Foo {
395 void m() { bar(1, "x"); }
396 int bar(int n, String s) { return n; }
397 }
398 "#;
399 let g = extract_default("Foo.java", src, &make_anchor(), false);
400 let r = g.refs().find(|r| r.kind == b"calls").expect("calls ref");
401 let last = r.target.as_view().segments().last().unwrap();
402 assert_eq!(last.name, b"bar(n:int,s:String)");
403 }
404
405 #[test]
406 fn method_call_on_unresolved_receiver_falls_back_to_name_only() {
407 let src = r#"
408 class Foo {
409 void m() { obj.bar(1); }
410 }
411 "#;
412 let g = extract_default("Foo.java", src, &make_anchor(), false);
413 let r = g
414 .refs()
415 .find(|r| r.kind == b"method_call")
416 .expect("method_call ref");
417 let last = r.target.as_view().segments().last().unwrap();
418 assert_eq!(
419 last.name, b"bar",
420 "unresolved receiver must produce a name-only target (no parens, no arity)"
421 );
422 }
423
424 #[test]
425 fn extract_object_creation_emits_instantiates() {
426 let src = r#"
427 class Foo {
428 Object m() { return new Bar(); }
429 }
430 "#;
431 let g = extract_default("Foo.java", src, &make_anchor(), false);
432 let r = g
433 .refs()
434 .find(|r| r.kind == b"instantiates")
435 .expect("instantiates ref");
436 let last = r.target.as_view().segments().last().unwrap();
437 assert_eq!(last.kind, b"class");
438 assert_eq!(last.name, b"Bar");
439 }
440
441 #[test]
442 fn extract_annotation_on_class_emits_annotates() {
443 let src = "@Deprecated public class Foo {}";
444 let g = extract_default("Foo.java", src, &make_anchor(), false);
445 assert!(g.refs().any(|r| r.kind == b"annotates"));
446 }
447
448 #[test]
449 fn extract_imported_call_marks_confidence_imported() {
450 let src = r#"
451 import com.acme.Helpers;
452 class Foo { void m() { Helpers.go(); } }
453 "#;
454 let g = extract_default("Foo.java", src, &make_anchor(), false);
455 let reads_helpers = g.refs().find(|r| {
456 r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"Helpers"
457 });
458 if let Some(r) = reads_helpers {
459 assert_eq!(r.confidence, b"imported".to_vec());
460 }
461 }
462
463 #[test]
464 fn extract_same_file_type_resolves_with_real_target() {
465 let src = r#"
466 class Bar {}
467 class Foo {
468 Bar b;
469 Object m() { return new Bar(); }
470 }
471 "#;
472 let g = extract_default("Foo.java", src, &make_anchor(), false);
473
474 let bar_def = MonikerBuilder::new()
475 .project(b"app")
476 .segment(b"lang", b"java")
477 .segment(b"module", b"Foo")
478 .segment(b"class", b"Bar")
479 .build();
480
481 let uses = g
482 .refs()
483 .find(|r| r.kind == b"uses_type" && r.target == bar_def)
484 .expect("uses_type ref to Bar");
485 assert_eq!(uses.confidence, b"resolved".to_vec());
486
487 let inst = g
488 .refs()
489 .find(|r| r.kind == b"instantiates" && r.target == bar_def)
490 .expect("instantiates ref to Bar");
491 assert_eq!(inst.confidence, b"resolved".to_vec());
492 }
493
494 #[test]
495 fn extract_nested_type_resolves_via_table() {
496 let src = r#"
497 class Outer {
498 static class Inner {}
499 Inner make() { return new Inner(); }
500 }
501 "#;
502 let g = extract_default("Outer.java", src, &make_anchor(), false);
503 let inner = MonikerBuilder::new()
504 .project(b"app")
505 .segment(b"lang", b"java")
506 .segment(b"module", b"Outer")
507 .segment(b"class", b"Outer")
508 .segment(b"class", b"Inner")
509 .build();
510 let r = g
511 .refs()
512 .find(|r| r.kind == b"instantiates" && r.target == inner)
513 .expect("instantiates Inner");
514 assert_eq!(r.confidence, b"resolved".to_vec());
515 }
516
517 #[test]
518 fn extract_deep_catch_param_emits_local_def() {
519 let src = r#"
520 class Foo {
521 void m() { try {} catch (IOException e) { e.toString(); } }
522 }
523 "#;
524 let g = extract_default("Foo.java", src, &make_anchor(), true);
525 let monikers = g.def_monikers();
526 let e = monikers.iter().find(|m| {
527 let last = m.as_view().segments().last().unwrap();
528 last.kind == b"param" && last.name == b"e"
529 });
530 assert!(
531 e.is_some(),
532 "catch param should be emitted as a param def in deep mode"
533 );
534 }
535
536 #[test]
537 fn extract_deep_enhanced_for_var_is_local() {
538 let src = r#"
539 class Foo {
540 void m(java.util.List<String> xs) { for (String x : xs) { x.length(); } }
541 }
542 "#;
543 let g = extract_default("Foo.java", src, &make_anchor(), true);
544 assert!(
545 g.defs().any(|d| d.kind == b"local"
546 && d.moniker.as_view().segments().last().unwrap().name == b"x"),
547 "enhanced-for var should be a local def"
548 );
549 }
550
551 #[test]
552 fn extract_lambda_param_marks_reads_as_local() {
553 let src = r#"
554 class Foo {
555 java.util.function.BinaryOperator<Integer> add = (a, b) -> a + b;
556 }
557 "#;
558 let g = extract_default("Foo.java", src, &make_anchor(), true);
559 let read_a = g
560 .refs()
561 .find(|r| {
562 r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"a"
563 })
564 .expect("reads a inside lambda");
565 assert_eq!(read_a.confidence, b"local".to_vec());
566 }
567
568 #[test]
569 fn extract_param_read_marks_confidence_local() {
570 let src = r#"
571 class Foo { int m(int x) { return x; } }
572 "#;
573 let g = extract_default("Foo.java", src, &make_anchor(), true);
574 let r = g.refs().find(|r| r.kind == b"reads").expect("reads ref");
575 assert_eq!(r.confidence, b"local".to_vec());
576 }
577}