code-moniker-core 0.2.0

Core symbol-graph types and per-language extractors for code-moniker (pure Rust, no pgrx). Consumed by the CLI and the PostgreSQL extension.
Documentation
use tree_sitter::Node;

use crate::core::code_graph::{CodeGraph, DefAttrs, RefAttrs};
use crate::core::moniker::Moniker;

use crate::lang::callable::extend_segment_u32;
use crate::lang::strategy::{LangStrategy, NodeShape};

pub struct CanonicalWalker<'a, S: LangStrategy> {
	pub strategy: &'a S,
	pub source: &'a [u8],
}

struct PendingAnnotation {
	kind: &'static [u8],
	start_byte: u32,
	end_byte: u32,
	end_row: usize,
}

impl<'a, S: LangStrategy> CanonicalWalker<'a, S> {
	pub fn new(strategy: &'a S, source: &'a [u8]) -> Self {
		Self { strategy, source }
	}

	pub fn walk(&self, node: Node<'_>, scope: &Moniker, graph: &mut CodeGraph) {
		let mut cursor = node.walk();
		let mut pending: Option<PendingAnnotation> = None;
		for child in node.children(&mut cursor) {
			match self.strategy.classify(child, scope, self.source, graph) {
				NodeShape::Annotation { kind } => {
					self.extend_or_flush(&mut pending, kind, child, scope, graph);
				}
				NodeShape::Symbol(sym) => {
					self.flush_pending(&mut pending, scope, graph);
					self.emit_symbol(child, scope, sym, graph);
				}
				NodeShape::Skip => self.flush_pending(&mut pending, scope, graph),
				NodeShape::Recurse => {
					self.flush_pending(&mut pending, scope, graph);
					self.walk(child, scope, graph);
				}
			}
		}
		self.flush_pending(&mut pending, scope, graph);
	}

	fn extend_or_flush(
		&self,
		pending: &mut Option<PendingAnnotation>,
		kind: &'static [u8],
		child: Node<'_>,
		scope: &Moniker,
		graph: &mut CodeGraph,
	) {
		let start_row = child.start_position().row;
		let end_row = child.end_position().row;
		let start_byte = child.start_byte() as u32;
		let end_byte = child.end_byte() as u32;
		if let Some(p) = pending.as_mut() {
			if p.kind == kind && start_row <= p.end_row + 1 {
				p.end_byte = end_byte;
				p.end_row = end_row;
				return;
			}
			self.emit_annotation_range(p.kind, p.start_byte, p.end_byte, scope, graph);
		}
		*pending = Some(PendingAnnotation {
			kind,
			start_byte,
			end_byte,
			end_row,
		});
	}

	fn flush_pending(
		&self,
		pending: &mut Option<PendingAnnotation>,
		scope: &Moniker,
		graph: &mut CodeGraph,
	) {
		if let Some(p) = pending.take() {
			self.emit_annotation_range(p.kind, p.start_byte, p.end_byte, scope, graph);
		}
	}

	pub fn dispatch(&self, node: Node<'_>, scope: &Moniker, graph: &mut CodeGraph) {
		match self.strategy.classify(node, scope, self.source, graph) {
			NodeShape::Annotation { kind } => {
				self.emit_annotation_range(
					kind,
					node.start_byte() as u32,
					node.end_byte() as u32,
					scope,
					graph,
				);
			}
			NodeShape::Symbol(sym) => {
				self.emit_symbol(node, scope, sym, graph);
			}
			NodeShape::Skip => {}
			NodeShape::Recurse => self.walk(node, scope, graph),
		}
	}

	fn emit_annotation_range(
		&self,
		kind: &'static [u8],
		start_byte: u32,
		end_byte: u32,
		scope: &Moniker,
		graph: &mut CodeGraph,
	) {
		let m = extend_segment_u32(scope, kind, start_byte);
		let _ = graph.add_def(m, kind, scope, Some((start_byte, end_byte)));
	}

	fn emit_symbol(
		&self,
		node: Node<'_>,
		scope: &Moniker,
		sym: crate::lang::strategy::Symbol<'_>,
		graph: &mut CodeGraph,
	) {
		let crate::lang::strategy::Symbol {
			moniker: m,
			kind,
			visibility,
			signature,
			body,
			position,
			annotated_by,
		} = sym;

		let attrs = DefAttrs {
			visibility,
			signature: signature.as_deref().unwrap_or_default(),
			..DefAttrs::default()
		};
		let added = graph
			.add_def_attrs(m.clone(), kind, scope, Some(position), &attrs)
			.is_ok();
		if !added {
			return;
		}

		for r in annotated_by {
			let attrs = RefAttrs {
				confidence: r.confidence,
				receiver_hint: r.receiver_hint,
				alias: r.alias,
				..RefAttrs::default()
			};
			let _ = graph.add_ref_attrs(&m, r.target, r.kind, Some(r.position), &attrs);
		}

		if let Some(body_node) = body {
			self.strategy
				.before_body(node, kind, &m, self.source, graph);
			self.walk(body_node, &m, graph);
			self.strategy.after_body(kind, &m);
		}

		self.strategy
			.on_symbol_emitted(node, kind, &m, self.source, graph);
	}
}

#[cfg(test)]
mod tests {
	use super::*;
	use crate::core::moniker::MonikerBuilder;
	use crate::lang::strategy::{NodeShape, Symbol};

	struct RustToyStrategy;

	impl LangStrategy for RustToyStrategy {
		fn classify<'src>(
			&self,
			node: Node<'src>,
			scope: &Moniker,
			source: &'src [u8],
			_graph: &mut CodeGraph,
		) -> NodeShape<'src> {
			match node.kind() {
				"line_comment" | "block_comment" => NodeShape::Annotation { kind: b"comment" },
				"struct_item" => {
					let Some(name) = node.child_by_field_name("name") else {
						return NodeShape::Recurse;
					};
					let bytes = &source[name.start_byte()..name.end_byte()];
					let moniker = MonikerBuilder::from_view(scope.as_view())
						.segment(b"struct", bytes)
						.build();
					NodeShape::Symbol(Symbol {
						moniker,
						kind: b"struct",
						visibility: b"public",
						signature: None,
						body: node.child_by_field_name("body"),
						position: (node.start_byte() as u32, node.end_byte() as u32),
						annotated_by: Vec::new(),
					})
				}
				"function_item" => {
					let Some(name) = node.child_by_field_name("name") else {
						return NodeShape::Recurse;
					};
					let bytes = &source[name.start_byte()..name.end_byte()];
					let moniker = MonikerBuilder::from_view(scope.as_view())
						.segment(b"fn", bytes)
						.build();
					NodeShape::Symbol(Symbol {
						moniker,
						kind: b"fn",
						visibility: b"public",
						signature: None,
						body: node.child_by_field_name("body"),
						position: (node.start_byte() as u32, node.end_byte() as u32),
						annotated_by: Vec::new(),
					})
				}
				_ => NodeShape::Recurse,
			}
		}
	}

	fn anchor() -> Moniker {
		MonikerBuilder::new()
			.project(b"app")
			.segment(b"lang", b"rs")
			.segment(b"module", b"toy")
			.build()
	}

	#[test]
	fn canonical_walker_emits_struct_and_fn_via_strategy() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"pub struct Foo;\npub fn bar() {}";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		let kinds: Vec<&[u8]> = g.defs().map(|d| d.kind.as_slice()).collect();
		assert!(kinds.contains(&b"struct".as_slice()));
		assert!(kinds.contains(&b"fn".as_slice()));
	}

	#[test]
	fn canonical_walker_emits_comments_at_top_level() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"// hi\npub struct Foo;";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		assert_eq!(g.defs().filter(|d| d.kind == b"comment").count(), 1);
	}

	#[test]
	fn canonical_walker_recurses_into_struct_body_and_finds_inner_comments() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"pub struct Foo {\n    // hi\n    x: i32,\n}";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		let comment_under_struct = g.defs().filter(|d| d.kind == b"comment").any(|d| {
			d.moniker
				.as_view()
				.segments()
				.any(|s| s.kind == b"struct" && s.name == b"Foo")
		});
		assert!(
			comment_under_struct,
			"comment inside struct body should be re-parented onto the struct"
		);
	}

	#[test]
	fn canonical_walker_collapses_consecutive_line_comments_into_one_def() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"// a\n// b\n// c\npub struct Foo;";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		let comments: Vec<_> = g.defs().filter(|d| d.kind == b"comment").collect();
		assert_eq!(
			comments.len(),
			1,
			"three adjacent line comments collapse to one def"
		);
		let pos = comments[0].position.expect("comment has a position");
		assert_eq!(
			&src[pos.0 as usize..pos.1 as usize],
			b"// a\n// b\n// c".as_slice(),
			"collapsed span covers the whole run"
		);
	}

	#[test]
	fn canonical_walker_splits_comments_separated_by_blank_line() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"// a\n// b\n\n// c\npub struct Foo;";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		assert_eq!(
			g.defs().filter(|d| d.kind == b"comment").count(),
			2,
			"a blank line breaks the run"
		);
	}

	#[test]
	fn canonical_walker_splits_comments_separated_by_code() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"// a\npub struct Foo;\n// b\npub struct Bar;";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		assert_eq!(
			g.defs().filter(|d| d.kind == b"comment").count(),
			2,
			"code between two comments forces two separate defs"
		);
	}

	#[test]
	fn canonical_walker_does_not_drop_comments_in_mod_inline_position() {
		let mut p = tree_sitter::Parser::new();
		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
		let src = b"pub mod inner {\n    // inside\n    pub struct InnerStruct;\n}";
		let tree = p.parse(src, None).unwrap();

		let root = anchor();
		let mut g = CodeGraph::new(root.clone(), b"module");
		let w = CanonicalWalker::new(&RustToyStrategy, src);
		w.walk(tree.root_node(), &root, &mut g);

		assert_eq!(
			g.defs().filter(|d| d.kind == b"comment").count(),
			1,
			"default-recurse must reach into mod_item; comment inside must be emitted"
		);
		assert!(
			g.defs().any(|d| d.kind == b"struct"),
			"the inner struct must also be emitted because the walker recursed"
		);
	}
}