1use tree_sitter::Node;
2
3use crate::core::code_graph::{CodeGraph, DefAttrs, RefAttrs};
4use crate::core::moniker::Moniker;
5
6use crate::lang::callable::extend_segment_u32;
7use crate::lang::strategy::{LangStrategy, NodeShape};
8
9pub struct CanonicalWalker<'a, S: LangStrategy> {
10 pub strategy: &'a S,
11 pub source: &'a [u8],
12}
13
14struct PendingAnnotation {
15 kind: &'static [u8],
16 start_byte: u32,
17 end_byte: u32,
18 end_row: usize,
19}
20
21impl<'a, S: LangStrategy> CanonicalWalker<'a, S> {
22 pub fn new(strategy: &'a S, source: &'a [u8]) -> Self {
23 Self { strategy, source }
24 }
25
26 pub fn walk(&self, node: Node<'_>, scope: &Moniker, graph: &mut CodeGraph) {
27 let mut cursor = node.walk();
28 let mut pending: Option<PendingAnnotation> = None;
29 for child in node.children(&mut cursor) {
30 match self.strategy.classify(child, scope, self.source, graph) {
31 NodeShape::Annotation { kind } => {
32 self.extend_or_flush(&mut pending, kind, child, scope, graph);
33 }
34 NodeShape::Symbol(sym) => {
35 self.flush_pending(&mut pending, scope, graph);
36 self.emit_symbol(child, scope, sym, graph);
37 }
38 NodeShape::Skip => self.flush_pending(&mut pending, scope, graph),
39 NodeShape::Recurse => {
40 self.flush_pending(&mut pending, scope, graph);
41 self.walk(child, scope, graph);
42 }
43 }
44 }
45 self.flush_pending(&mut pending, scope, graph);
46 }
47
48 fn extend_or_flush(
49 &self,
50 pending: &mut Option<PendingAnnotation>,
51 kind: &'static [u8],
52 child: Node<'_>,
53 scope: &Moniker,
54 graph: &mut CodeGraph,
55 ) {
56 let start_row = child.start_position().row;
57 let end_row = child.end_position().row;
58 let start_byte = child.start_byte() as u32;
59 let end_byte = child.end_byte() as u32;
60 if let Some(p) = pending.as_mut() {
61 if p.kind == kind && start_row <= p.end_row + 1 {
62 p.end_byte = end_byte;
63 p.end_row = end_row;
64 return;
65 }
66 self.emit_annotation_range(p.kind, p.start_byte, p.end_byte, scope, graph);
67 }
68 *pending = Some(PendingAnnotation {
69 kind,
70 start_byte,
71 end_byte,
72 end_row,
73 });
74 }
75
76 fn flush_pending(
77 &self,
78 pending: &mut Option<PendingAnnotation>,
79 scope: &Moniker,
80 graph: &mut CodeGraph,
81 ) {
82 if let Some(p) = pending.take() {
83 self.emit_annotation_range(p.kind, p.start_byte, p.end_byte, scope, graph);
84 }
85 }
86
87 pub fn dispatch(&self, node: Node<'_>, scope: &Moniker, graph: &mut CodeGraph) {
88 match self.strategy.classify(node, scope, self.source, graph) {
89 NodeShape::Annotation { kind } => {
90 self.emit_annotation_range(
91 kind,
92 node.start_byte() as u32,
93 node.end_byte() as u32,
94 scope,
95 graph,
96 );
97 }
98 NodeShape::Symbol(sym) => {
99 self.emit_symbol(node, scope, sym, graph);
100 }
101 NodeShape::Skip => {}
102 NodeShape::Recurse => self.walk(node, scope, graph),
103 }
104 }
105
106 fn emit_annotation_range(
107 &self,
108 kind: &'static [u8],
109 start_byte: u32,
110 end_byte: u32,
111 scope: &Moniker,
112 graph: &mut CodeGraph,
113 ) {
114 let m = extend_segment_u32(scope, kind, start_byte);
115 let _ = graph.add_def(m, kind, scope, Some((start_byte, end_byte)));
116 }
117
118 fn emit_symbol(
119 &self,
120 node: Node<'_>,
121 scope: &Moniker,
122 sym: crate::lang::strategy::Symbol<'_>,
123 graph: &mut CodeGraph,
124 ) {
125 let crate::lang::strategy::Symbol {
126 moniker: m,
127 kind,
128 visibility,
129 signature,
130 body,
131 position,
132 annotated_by,
133 } = sym;
134
135 let attrs = DefAttrs {
136 visibility,
137 signature: signature.as_deref().unwrap_or_default(),
138 ..DefAttrs::default()
139 };
140 let added = graph
141 .add_def_attrs(m.clone(), kind, scope, Some(position), &attrs)
142 .is_ok();
143 if !added {
144 return;
145 }
146
147 for r in annotated_by {
148 let attrs = RefAttrs {
149 confidence: r.confidence,
150 receiver_hint: r.receiver_hint,
151 alias: r.alias,
152 ..RefAttrs::default()
153 };
154 let _ = graph.add_ref_attrs(&m, r.target, r.kind, Some(r.position), &attrs);
155 }
156
157 if let Some(body_node) = body {
158 self.strategy
159 .before_body(node, kind, &m, self.source, graph);
160 self.walk(body_node, &m, graph);
161 self.strategy.after_body(kind, &m);
162 }
163
164 self.strategy
165 .on_symbol_emitted(node, kind, &m, self.source, graph);
166 }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172 use crate::core::moniker::MonikerBuilder;
173 use crate::lang::strategy::{NodeShape, Symbol};
174
175 struct RustToyStrategy;
176
177 impl LangStrategy for RustToyStrategy {
178 fn classify<'src>(
179 &self,
180 node: Node<'src>,
181 scope: &Moniker,
182 source: &'src [u8],
183 _graph: &mut CodeGraph,
184 ) -> NodeShape<'src> {
185 match node.kind() {
186 "line_comment" | "block_comment" => NodeShape::Annotation { kind: b"comment" },
187 "struct_item" => {
188 let Some(name) = node.child_by_field_name("name") else {
189 return NodeShape::Recurse;
190 };
191 let bytes = &source[name.start_byte()..name.end_byte()];
192 let moniker = MonikerBuilder::from_view(scope.as_view())
193 .segment(b"struct", bytes)
194 .build();
195 NodeShape::Symbol(Symbol {
196 moniker,
197 kind: b"struct",
198 visibility: b"public",
199 signature: None,
200 body: node.child_by_field_name("body"),
201 position: (node.start_byte() as u32, node.end_byte() as u32),
202 annotated_by: Vec::new(),
203 })
204 }
205 "function_item" => {
206 let Some(name) = node.child_by_field_name("name") else {
207 return NodeShape::Recurse;
208 };
209 let bytes = &source[name.start_byte()..name.end_byte()];
210 let moniker = MonikerBuilder::from_view(scope.as_view())
211 .segment(b"fn", bytes)
212 .build();
213 NodeShape::Symbol(Symbol {
214 moniker,
215 kind: b"fn",
216 visibility: b"public",
217 signature: None,
218 body: node.child_by_field_name("body"),
219 position: (node.start_byte() as u32, node.end_byte() as u32),
220 annotated_by: Vec::new(),
221 })
222 }
223 _ => NodeShape::Recurse,
224 }
225 }
226 }
227
228 fn anchor() -> Moniker {
229 MonikerBuilder::new()
230 .project(b"app")
231 .segment(b"lang", b"rs")
232 .segment(b"module", b"toy")
233 .build()
234 }
235
236 #[test]
237 fn canonical_walker_emits_struct_and_fn_via_strategy() {
238 let mut p = tree_sitter::Parser::new();
239 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
240 let src = b"pub struct Foo;\npub fn bar() {}";
241 let tree = p.parse(src, None).unwrap();
242
243 let root = anchor();
244 let mut g = CodeGraph::new(root.clone(), b"module");
245 let w = CanonicalWalker::new(&RustToyStrategy, src);
246 w.walk(tree.root_node(), &root, &mut g);
247
248 let kinds: Vec<&[u8]> = g.defs().map(|d| d.kind.as_slice()).collect();
249 assert!(kinds.contains(&b"struct".as_slice()));
250 assert!(kinds.contains(&b"fn".as_slice()));
251 }
252
253 #[test]
254 fn canonical_walker_emits_comments_at_top_level() {
255 let mut p = tree_sitter::Parser::new();
256 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
257 let src = b"// hi\npub struct Foo;";
258 let tree = p.parse(src, None).unwrap();
259
260 let root = anchor();
261 let mut g = CodeGraph::new(root.clone(), b"module");
262 let w = CanonicalWalker::new(&RustToyStrategy, src);
263 w.walk(tree.root_node(), &root, &mut g);
264
265 assert_eq!(g.defs().filter(|d| d.kind == b"comment").count(), 1);
266 }
267
268 #[test]
269 fn canonical_walker_recurses_into_struct_body_and_finds_inner_comments() {
270 let mut p = tree_sitter::Parser::new();
271 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
272 let src = b"pub struct Foo {\n // hi\n x: i32,\n}";
273 let tree = p.parse(src, None).unwrap();
274
275 let root = anchor();
276 let mut g = CodeGraph::new(root.clone(), b"module");
277 let w = CanonicalWalker::new(&RustToyStrategy, src);
278 w.walk(tree.root_node(), &root, &mut g);
279
280 let comment_under_struct = g.defs().filter(|d| d.kind == b"comment").any(|d| {
281 d.moniker
282 .as_view()
283 .segments()
284 .any(|s| s.kind == b"struct" && s.name == b"Foo")
285 });
286 assert!(
287 comment_under_struct,
288 "comment inside struct body should be re-parented onto the struct"
289 );
290 }
291
292 #[test]
293 fn canonical_walker_collapses_consecutive_line_comments_into_one_def() {
294 let mut p = tree_sitter::Parser::new();
295 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
296 let src = b"// a\n// b\n// c\npub struct Foo;";
297 let tree = p.parse(src, None).unwrap();
298
299 let root = anchor();
300 let mut g = CodeGraph::new(root.clone(), b"module");
301 let w = CanonicalWalker::new(&RustToyStrategy, src);
302 w.walk(tree.root_node(), &root, &mut g);
303
304 let comments: Vec<_> = g.defs().filter(|d| d.kind == b"comment").collect();
305 assert_eq!(
306 comments.len(),
307 1,
308 "three adjacent line comments collapse to one def"
309 );
310 let pos = comments[0].position.expect("comment has a position");
311 assert_eq!(
312 &src[pos.0 as usize..pos.1 as usize],
313 b"// a\n// b\n// c".as_slice(),
314 "collapsed span covers the whole run"
315 );
316 }
317
318 #[test]
319 fn canonical_walker_splits_comments_separated_by_blank_line() {
320 let mut p = tree_sitter::Parser::new();
321 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
322 let src = b"// a\n// b\n\n// c\npub struct Foo;";
323 let tree = p.parse(src, None).unwrap();
324
325 let root = anchor();
326 let mut g = CodeGraph::new(root.clone(), b"module");
327 let w = CanonicalWalker::new(&RustToyStrategy, src);
328 w.walk(tree.root_node(), &root, &mut g);
329
330 assert_eq!(
331 g.defs().filter(|d| d.kind == b"comment").count(),
332 2,
333 "a blank line breaks the run"
334 );
335 }
336
337 #[test]
338 fn canonical_walker_splits_comments_separated_by_code() {
339 let mut p = tree_sitter::Parser::new();
340 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
341 let src = b"// a\npub struct Foo;\n// b\npub struct Bar;";
342 let tree = p.parse(src, None).unwrap();
343
344 let root = anchor();
345 let mut g = CodeGraph::new(root.clone(), b"module");
346 let w = CanonicalWalker::new(&RustToyStrategy, src);
347 w.walk(tree.root_node(), &root, &mut g);
348
349 assert_eq!(
350 g.defs().filter(|d| d.kind == b"comment").count(),
351 2,
352 "code between two comments forces two separate defs"
353 );
354 }
355
356 #[test]
357 fn canonical_walker_does_not_drop_comments_in_mod_inline_position() {
358 let mut p = tree_sitter::Parser::new();
359 p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap();
360 let src = b"pub mod inner {\n // inside\n pub struct InnerStruct;\n}";
361 let tree = p.parse(src, None).unwrap();
362
363 let root = anchor();
364 let mut g = CodeGraph::new(root.clone(), b"module");
365 let w = CanonicalWalker::new(&RustToyStrategy, src);
366 w.walk(tree.root_node(), &root, &mut g);
367
368 assert_eq!(
369 g.defs().filter(|d| d.kind == b"comment").count(),
370 1,
371 "default-recurse must reach into mod_item; comment inside must be emitted"
372 );
373 assert!(
374 g.defs().any(|d| d.kind == b"struct"),
375 "the inner struct must also be emitted because the walker recursed"
376 );
377 }
378}