Skip to main content

code_moniker_core/lang/
canonical_walker.rs

1use tree_sitter::Node;
2
3use crate::core::code_graph::{CodeGraph, DefAttrs, RefAttrs};
4use crate::core::moniker::Moniker;
5
6use crate::lang::callable::extend_segment_u32;
7use crate::lang::strategy::{LangStrategy, NodeShape};
8
9pub struct CanonicalWalker<'a, S: LangStrategy> {
10	pub strategy: &'a S,
11	pub source: &'a [u8],
12}
13
14struct PendingAnnotation {
15	kind: &'static [u8],
16	start_byte: u32,
17	end_byte: u32,
18	end_row: usize,
19}
20
21impl<'a, S: LangStrategy> CanonicalWalker<'a, S> {
22	pub fn new(strategy: &'a S, source: &'a [u8]) -> Self {
23		Self { strategy, source }
24	}
25
26	pub fn walk(&self, node: Node<'_>, scope: &Moniker, graph: &mut CodeGraph) {
27		let mut cursor = node.walk();
28		let mut pending: Option<PendingAnnotation> = None;
29		for child in node.children(&mut cursor) {
30			match self.strategy.classify(child, scope, self.source, graph) {
31				NodeShape::Annotation { kind } => {
32					self.extend_or_flush(&mut pending, kind, child, scope, graph);
33				}
34				NodeShape::Symbol(sym) => {
35					self.flush_pending(&mut pending, scope, graph);
36					self.emit_symbol(child, scope, sym, graph);
37				}
38				NodeShape::Skip => self.flush_pending(&mut pending, scope, graph),
39				NodeShape::Recurse => {
40					self.flush_pending(&mut pending, scope, graph);
41					self.walk(child, scope, graph);
42				}
43			}
44		}
45		self.flush_pending(&mut pending, scope, graph);
46	}
47
48	fn extend_or_flush(
49		&self,
50		pending: &mut Option<PendingAnnotation>,
51		kind: &'static [u8],
52		child: Node<'_>,
53		scope: &Moniker,
54		graph: &mut CodeGraph,
55	) {
56		let start_row = child.start_position().row;
57		let end_row = child.end_position().row;
58		let start_byte = child.start_byte() as u32;
59		let end_byte = child.end_byte() as u32;
60		if let Some(p) = pending.as_mut() {
61			if p.kind == kind && start_row <= p.end_row + 1 {
62				p.end_byte = end_byte;
63				p.end_row = end_row;
64				return;
65			}
66			self.emit_annotation_range(p.kind, p.start_byte, p.end_byte, scope, graph);
67		}
68		*pending = Some(PendingAnnotation {
69			kind,
70			start_byte,
71			end_byte,
72			end_row,
73		});
74	}
75
76	fn flush_pending(
77		&self,
78		pending: &mut Option<PendingAnnotation>,
79		scope: &Moniker,
80		graph: &mut CodeGraph,
81	) {
82		if let Some(p) = pending.take() {
83			self.emit_annotation_range(p.kind, p.start_byte, p.end_byte, scope, graph);
84		}
85	}
86
87	pub fn dispatch(&self, node: Node<'_>, scope: &Moniker, graph: &mut CodeGraph) {
88		match self.strategy.classify(node, scope, self.source, graph) {
89			NodeShape::Annotation { kind } => {
90				self.emit_annotation_range(
91					kind,
92					node.start_byte() as u32,
93					node.end_byte() as u32,
94					scope,
95					graph,
96				);
97			}
98			NodeShape::Symbol(sym) => {
99				self.emit_symbol(node, scope, sym, graph);
100			}
101			NodeShape::Skip => {}
102			NodeShape::Recurse => self.walk(node, scope, graph),
103		}
104	}
105
106	fn emit_annotation_range(
107		&self,
108		kind: &'static [u8],
109		start_byte: u32,
110		end_byte: u32,
111		scope: &Moniker,
112		graph: &mut CodeGraph,
113	) {
114		let m = extend_segment_u32(scope, kind, start_byte);
115		let _ = graph.add_def(m, kind, scope, Some((start_byte, end_byte)));
116	}
117
118	fn emit_symbol(
119		&self,
120		node: Node<'_>,
121		scope: &Moniker,
122		sym: crate::lang::strategy::Symbol<'_>,
123		graph: &mut CodeGraph,
124	) {
125		let crate::lang::strategy::Symbol {
126			moniker: m,
127			kind,
128			visibility,
129			signature,
130			body,
131			position,
132			annotated_by,
133		} = sym;
134
135		let attrs = DefAttrs {
136			visibility,
137			signature: signature.as_deref().unwrap_or_default(),
138			..DefAttrs::default()
139		};
140		let added = graph
141			.add_def_attrs(m.clone(), kind, scope, Some(position), &attrs)
142			.is_ok();
143		if !added {
144			return;
145		}
146
147		for r in annotated_by {
148			let attrs = RefAttrs {
149				confidence: r.confidence,
150				receiver_hint: r.receiver_hint,
151				alias: r.alias,
152				..RefAttrs::default()
153			};
154			let _ = graph.add_ref_attrs(&m, r.target, r.kind, Some(r.position), &attrs);
155		}
156
157		if let Some(body_node) = body {
158			self.strategy
159				.before_body(node, kind, &m, self.source, graph);
160			self.walk(body_node, &m, graph);
161			self.strategy.after_body(kind, &m);
162		}
163
164		self.strategy
165			.on_symbol_emitted(node, kind, &m, self.source, graph);
166	}
167}
168
169#[cfg(test)]
170mod tests {
171	use super::*;
172	use crate::core::moniker::MonikerBuilder;
173	use crate::lang::strategy::{NodeShape, Symbol};
174
175	struct RustToyStrategy;
176
177	impl LangStrategy for RustToyStrategy {
178		fn classify<'src>(
179			&self,
180			node: Node<'src>,
181			scope: &Moniker,
182			source: &'src [u8],
183			_graph: &mut CodeGraph,
184		) -> NodeShape<'src> {
185			match node.kind() {
186				"line_comment" | "block_comment" => NodeShape::Annotation { kind: b"comment" },
187				"struct_item" => {
188					let Some(name) = node.child_by_field_name("name") else {
189						return NodeShape::Recurse;
190					};
191					let bytes = &source[name.start_byte()..name.end_byte()];
192					let moniker = MonikerBuilder::from_view(scope.as_view())
193						.segment(b"struct", bytes)
194						.build();
195					NodeShape::Symbol(Symbol {
196						moniker,
197						kind: b"struct",
198						visibility: b"public",
199						signature: None,
200						body: node.child_by_field_name("body"),
201						position: (node.start_byte() as u32, node.end_byte() as u32),
202						annotated_by: Vec::new(),
203					})
204				}
205				"function_item" => {
206					let Some(name) = node.child_by_field_name("name") else {
207						return NodeShape::Recurse;
208					};
209					let bytes = &source[name.start_byte()..name.end_byte()];
210					let moniker = MonikerBuilder::from_view(scope.as_view())
211						.segment(b"fn", bytes)
212						.build();
213					NodeShape::Symbol(Symbol {
214						moniker,
215						kind: b"fn",
216						visibility: b"public",
217						signature: None,
218						body: node.child_by_field_name("body"),
219						position: (node.start_byte() as u32, node.end_byte() as u32),
220						annotated_by: Vec::new(),
221					})
222				}
223				_ => NodeShape::Recurse,
224			}
225		}
226	}
227
228	fn anchor() -> Moniker {
229		MonikerBuilder::new()
230			.project(b"app")
231			.segment(b"lang", b"rs")
232			.segment(b"module", b"toy")
233			.build()
234	}
235
236	#[test]
237	fn canonical_walker_emits_struct_and_fn_via_strategy() {
238		let mut p = tree_sitter::Parser::new();
239		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
240		let src = b"pub struct Foo;\npub fn bar() {}";
241		let tree = p.parse(src, None).unwrap();
242
243		let root = anchor();
244		let mut g = CodeGraph::new(root.clone(), b"module");
245		let w = CanonicalWalker::new(&RustToyStrategy, src);
246		w.walk(tree.root_node(), &root, &mut g);
247
248		let kinds: Vec<&[u8]> = g.defs().map(|d| d.kind.as_slice()).collect();
249		assert!(kinds.contains(&b"struct".as_slice()));
250		assert!(kinds.contains(&b"fn".as_slice()));
251	}
252
253	#[test]
254	fn canonical_walker_emits_comments_at_top_level() {
255		let mut p = tree_sitter::Parser::new();
256		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
257		let src = b"// hi\npub struct Foo;";
258		let tree = p.parse(src, None).unwrap();
259
260		let root = anchor();
261		let mut g = CodeGraph::new(root.clone(), b"module");
262		let w = CanonicalWalker::new(&RustToyStrategy, src);
263		w.walk(tree.root_node(), &root, &mut g);
264
265		assert_eq!(g.defs().filter(|d| d.kind == b"comment").count(), 1);
266	}
267
268	#[test]
269	fn canonical_walker_recurses_into_struct_body_and_finds_inner_comments() {
270		let mut p = tree_sitter::Parser::new();
271		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
272		let src = b"pub struct Foo {\n    // hi\n    x: i32,\n}";
273		let tree = p.parse(src, None).unwrap();
274
275		let root = anchor();
276		let mut g = CodeGraph::new(root.clone(), b"module");
277		let w = CanonicalWalker::new(&RustToyStrategy, src);
278		w.walk(tree.root_node(), &root, &mut g);
279
280		let comment_under_struct = g.defs().filter(|d| d.kind == b"comment").any(|d| {
281			d.moniker
282				.as_view()
283				.segments()
284				.any(|s| s.kind == b"struct" && s.name == b"Foo")
285		});
286		assert!(
287			comment_under_struct,
288			"comment inside struct body should be re-parented onto the struct"
289		);
290	}
291
292	#[test]
293	fn canonical_walker_collapses_consecutive_line_comments_into_one_def() {
294		let mut p = tree_sitter::Parser::new();
295		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
296		let src = b"// a\n// b\n// c\npub struct Foo;";
297		let tree = p.parse(src, None).unwrap();
298
299		let root = anchor();
300		let mut g = CodeGraph::new(root.clone(), b"module");
301		let w = CanonicalWalker::new(&RustToyStrategy, src);
302		w.walk(tree.root_node(), &root, &mut g);
303
304		let comments: Vec<_> = g.defs().filter(|d| d.kind == b"comment").collect();
305		assert_eq!(
306			comments.len(),
307			1,
308			"three adjacent line comments collapse to one def"
309		);
310		let pos = comments[0].position.expect("comment has a position");
311		assert_eq!(
312			&src[pos.0 as usize..pos.1 as usize],
313			b"// a\n// b\n// c".as_slice(),
314			"collapsed span covers the whole run"
315		);
316	}
317
318	#[test]
319	fn canonical_walker_splits_comments_separated_by_blank_line() {
320		let mut p = tree_sitter::Parser::new();
321		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
322		let src = b"// a\n// b\n\n// c\npub struct Foo;";
323		let tree = p.parse(src, None).unwrap();
324
325		let root = anchor();
326		let mut g = CodeGraph::new(root.clone(), b"module");
327		let w = CanonicalWalker::new(&RustToyStrategy, src);
328		w.walk(tree.root_node(), &root, &mut g);
329
330		assert_eq!(
331			g.defs().filter(|d| d.kind == b"comment").count(),
332			2,
333			"a blank line breaks the run"
334		);
335	}
336
337	#[test]
338	fn canonical_walker_splits_comments_separated_by_code() {
339		let mut p = tree_sitter::Parser::new();
340		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
341		let src = b"// a\npub struct Foo;\n// b\npub struct Bar;";
342		let tree = p.parse(src, None).unwrap();
343
344		let root = anchor();
345		let mut g = CodeGraph::new(root.clone(), b"module");
346		let w = CanonicalWalker::new(&RustToyStrategy, src);
347		w.walk(tree.root_node(), &root, &mut g);
348
349		assert_eq!(
350			g.defs().filter(|d| d.kind == b"comment").count(),
351			2,
352			"code between two comments forces two separate defs"
353		);
354	}
355
356	#[test]
357	fn canonical_walker_does_not_drop_comments_in_mod_inline_position() {
358		let mut p = tree_sitter::Parser::new();
359		p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
360		let src = b"pub mod inner {\n    // inside\n    pub struct InnerStruct;\n}";
361		let tree = p.parse(src, None).unwrap();
362
363		let root = anchor();
364		let mut g = CodeGraph::new(root.clone(), b"module");
365		let w = CanonicalWalker::new(&RustToyStrategy, src);
366		w.walk(tree.root_node(), &root, &mut g);
367
368		assert_eq!(
369			g.defs().filter(|d| d.kind == b"comment").count(),
370			1,
371			"default-recurse must reach into mod_item; comment inside must be emitted"
372		);
373		assert!(
374			g.defs().any(|d| d.kind == b"struct"),
375			"the inner struct must also be emitted because the walker recursed"
376		);
377	}
378}