code-moniker-core 0.2.0

Core symbol-graph types and per-language extractors for code-moniker (pure Rust, no pgrx). Consumed by the CLI and the PostgreSQL extension.
Documentation
use std::cell::RefCell;
use std::collections::HashMap;

use tree_sitter::{Language, Parser, Tree};

use crate::core::code_graph::CodeGraph;
use crate::core::moniker::Moniker;

use crate::lang::canonical_walker::CanonicalWalker;

pub mod build;
mod canonicalize;
mod kinds;
mod strategy;

use canonicalize::{compute_module_moniker, read_package_name};
use strategy::{Strategy, collect_callable_table, collect_type_table};

#[derive(Clone, Debug, Default)]
pub struct Presets {
	pub external_packages: Vec<String>,
}

pub fn parse(source: &str) -> Tree {
	let mut parser = Parser::new();
	let language: Language = tree_sitter_java::LANGUAGE.into();
	parser
		.set_language(&language)
		.expect("failed to load tree-sitter Java grammar");
	parser
		.parse(source, None)
		.expect("tree-sitter parse returned None on a non-cancelled call")
}

pub fn extract(
	uri: &str,
	source: &str,
	anchor: &Moniker,
	deep: bool,
	presets: &Presets,
) -> CodeGraph {
	let tree = parse(source);
	let pkg = read_package_name(tree.root_node(), source.as_bytes());
	let pieces: Vec<&str> = pkg.split('.').filter(|s| !s.is_empty()).collect();
	let module = compute_module_moniker(anchor, uri, &pieces);
	let (def_cap, ref_cap) = CodeGraph::capacity_for_source(source.len());
	let mut graph = CodeGraph::with_capacity(module.clone(), kinds::MODULE, def_cap, ref_cap);
	let mut type_table: HashMap<&[u8], Moniker> = HashMap::new();
	collect_type_table(
		tree.root_node(),
		source.as_bytes(),
		&module,
		&mut type_table,
	);
	let mut callable_table: HashMap<(Moniker, Vec<u8>), Vec<u8>> = HashMap::new();
	collect_callable_table(
		tree.root_node(),
		source.as_bytes(),
		&module,
		&mut callable_table,
	);
	let strat = Strategy {
		module: module.clone(),
		source_bytes: source.as_bytes(),
		deep,
		presets,
		imports: RefCell::new(HashMap::<Vec<u8>, &'static [u8]>::new()),
		import_targets: RefCell::new(HashMap::<Vec<u8>, _>::new()),
		local_scope: RefCell::new(Vec::new()),
		type_table,
		callable_table,
	};
	let walker = CanonicalWalker::new(&strat, source.as_bytes());
	walker.walk(tree.root_node(), &module, &mut graph);
	graph
}

pub struct Lang;

impl crate::lang::LangExtractor for Lang {
	type Presets = Presets;
	const LANG_TAG: &'static str = "java";
	const ALLOWED_KINDS: &'static [&'static str] = &[
		"class",
		"interface",
		"enum",
		"record",
		"annotation_type",
		"method",
		"constructor",
		"field",
		"enum_constant",
	];
	const ALLOWED_VISIBILITIES: &'static [&'static str] =
		&["public", "protected", "package", "private"];

	fn extract(
		uri: &str,
		source: &str,
		anchor: &Moniker,
		deep: bool,
		presets: &Self::Presets,
	) -> CodeGraph {
		extract(uri, source, anchor, deep, presets)
	}
}

#[cfg(test)]
mod tests {
	use super::*;
	use crate::core::moniker::MonikerBuilder;
	use crate::lang::assert_conformance;

	fn make_anchor() -> Moniker {
		MonikerBuilder::new().project(b"app").build()
	}

	fn extract_default(uri: &str, source: &str, anchor: &Moniker, deep: bool) -> CodeGraph {
		let g = extract(uri, source, anchor, deep, &Presets::default());
		assert_conformance::<super::Lang>(&g, anchor);
		g
	}

	#[test]
	fn parse_empty_returns_program() {
		let tree = parse("");
		assert_eq!(tree.root_node().kind(), "program");
	}

	#[test]
	fn extract_default_package_skips_package_segments() {
		let g = extract_default("Foo.java", "class Foo {}", &make_anchor(), false);
		let expected = MonikerBuilder::new()
			.project(b"app")
			.segment(b"lang", b"java")
			.segment(b"module", b"Foo")
			.build();
		assert_eq!(g.root(), &expected);
	}

	#[test]
	fn extract_class_emits_class_def_with_package_visibility_default() {
		let g = extract_default("Foo.java", "class Foo {}", &make_anchor(), false);
		let foo = g.defs().find(|d| d.kind == b"class").expect("class def");
		assert_eq!(foo.visibility, b"package".to_vec());
	}

	#[test]
	fn extract_field_one_def_per_declarator() {
		let src = "class Foo { int a, b; private String name; }";
		let g = extract_default("Foo.java", src, &make_anchor(), false);
		let fields: Vec<_> = g.defs().filter(|d| d.kind == b"field").collect();
		assert_eq!(
			fields.len(),
			3,
			"got {:?}",
			fields.iter().map(|d| &d.moniker).collect::<Vec<_>>()
		);
		let private_field = fields
			.iter()
			.find(|d| d.moniker.as_view().segments().last().unwrap().name == b"name")
			.unwrap();
		assert_eq!(private_field.visibility, b"private".to_vec());
	}

	#[test]
	fn extract_enum_emits_enum_constants() {
		let g = extract_default(
			"Color.java",
			"public enum Color { RED, GREEN }",
			&make_anchor(),
			false,
		);
		let red = MonikerBuilder::new()
			.project(b"app")
			.segment(b"lang", b"java")
			.segment(b"module", b"Color")
			.segment(b"enum", b"Color")
			.segment(b"enum_constant", b"RED")
			.build();
		assert!(
			g.contains(&red),
			"missing RED, defs: {:?}",
			g.def_monikers()
		);
	}

	#[test]
	fn extract_wildcard_import_emits_imports_module() {
		let src = "import com.acme.*;\nclass Foo {}";
		let g = extract_default("Foo.java", src, &make_anchor(), false);
		let r = g
			.refs()
			.find(|r| r.kind == b"imports_module")
			.expect("imports_module ref");
		assert_eq!(r.confidence, b"imported".to_vec());
	}

	#[test]
	fn extract_method_call_carries_receiver_hint() {
		let src = r#"
            class Foo {
                void m() { this.bar(); }
                void bar() {}
            }
        "#;
		let g = extract_default("Foo.java", src, &make_anchor(), false);
		let r = g
			.refs()
			.find(|r| r.kind == b"method_call")
			.expect("method_call ref");
		assert_eq!(r.receiver_hint, b"this".to_vec());
	}

	#[test]
	fn method_call_on_imported_class_carries_imported_confidence() {
		let src = r#"
            import com.acme.Util;
            class Foo {
                void m() { Util.run(); }
            }
        "#;
		let g = extract_default("src/Foo.java", src, &make_anchor(), false);
		let r = g
			.refs()
			.find(|r| r.kind == b"method_call" && r.receiver_hint == b"Util")
			.expect("method_call on Util");
		assert_eq!(r.confidence, b"imported");
	}

	#[test]
	fn method_call_on_non_imported_identifier_stays_name_match() {
		let src = r#"
            class Foo {
                void m() { obj.bar(); }
            }
        "#;
		let g = extract_default("src/Foo.java", src, &make_anchor(), false);
		let r = g
			.refs()
			.find(|r| r.kind == b"method_call" && r.receiver_hint == b"obj")
			.expect("method_call on obj");
		assert_eq!(r.confidence, b"name_match");
	}

	#[test]
	fn this_call_resolves_to_full_slot_signature() {
		let src = r#"
            class Foo {
                void m() { this.bar(); }
                void bar() {}
            }
        "#;
		let g = extract_default("Foo.java", src, &make_anchor(), false);
		let r = g
			.refs()
			.find(|r| r.kind == b"method_call")
			.expect("method_call ref");
		let last = r.target.as_view().segments().last().unwrap();
		assert_eq!(last.kind, b"method");
		assert_eq!(
			last.name, b"bar()",
			"this.bar() must resolve to the def's slot signature, not to a name-only fallback"
		);
	}

	#[test]
	fn method_call_on_unresolved_receiver_falls_back_to_name_only() {
		let src = r#"
            class Foo {
                void m() { obj.bar(1); }
            }
        "#;
		let g = extract_default("Foo.java", src, &make_anchor(), false);
		let r = g
			.refs()
			.find(|r| r.kind == b"method_call")
			.expect("method_call ref");
		let last = r.target.as_view().segments().last().unwrap();
		assert_eq!(
			last.name, b"bar",
			"unresolved receiver must produce a name-only target (no parens, no arity)"
		);
	}

	#[test]
	fn extract_imported_call_marks_confidence_imported() {
		let src = r#"
            import com.acme.Helpers;
            class Foo { void m() { Helpers.go(); } }
        "#;
		let g = extract_default("Foo.java", src, &make_anchor(), false);
		let reads_helpers = g.refs().find(|r| {
			r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"Helpers"
		});
		if let Some(r) = reads_helpers {
			assert_eq!(r.confidence, b"imported".to_vec());
		}
	}

	#[test]
	fn extract_deep_catch_param_emits_local_def() {
		let src = r#"
            class Foo {
                void m() { try {} catch (IOException e) { e.toString(); } }
            }
        "#;
		let g = extract_default("Foo.java", src, &make_anchor(), true);
		let monikers = g.def_monikers();
		let e = monikers.iter().find(|m| {
			let last = m.as_view().segments().last().unwrap();
			last.kind == b"param" && last.name == b"e"
		});
		assert!(
			e.is_some(),
			"catch param should be emitted as a param def in deep mode"
		);
	}

	#[test]
	fn extract_deep_enhanced_for_var_is_local() {
		let src = r#"
            class Foo {
                void m(java.util.List<String> xs) { for (String x : xs) { x.length(); } }
            }
        "#;
		let g = extract_default("Foo.java", src, &make_anchor(), true);
		assert!(
			g.defs().any(|d| d.kind == b"local"
				&& d.moniker.as_view().segments().last().unwrap().name == b"x"),
			"enhanced-for var should be a local def"
		);
	}
}