1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use tree_sitter::{Language, Parser, Tree};
5
6use crate::core::code_graph::CodeGraph;
7use crate::core::moniker::Moniker;
8
9use crate::lang::canonical_walker::CanonicalWalker;
10
11pub mod build;
12mod canonicalize;
13mod kinds;
14mod strategy;
15
16use canonicalize::compute_module_moniker;
17use strategy::{Strategy, collect_callable_table, collect_type_table};
18
19#[derive(Clone, Debug, Default)]
20pub struct Presets {}
21
22pub fn parse(source: &str) -> Tree {
23 let mut parser = Parser::new();
24 let language: Language = tree_sitter_python::LANGUAGE.into();
25 parser
26 .set_language(&language)
27 .expect("failed to load tree-sitter Python grammar");
28 parser
29 .parse(source, None)
30 .expect("tree-sitter parse returned None on a non-cancelled call")
31}
32
33pub fn extract(
34 uri: &str,
35 source: &str,
36 anchor: &Moniker,
37 deep: bool,
38 _presets: &Presets,
39) -> CodeGraph {
40 let tree = parse(source);
41 let module = compute_module_moniker(anchor, uri);
42 let (def_cap, ref_cap) = CodeGraph::capacity_for_source(source.len());
43 let mut graph = CodeGraph::with_capacity(module.clone(), kinds::MODULE, def_cap, ref_cap);
44 let mut type_table: HashMap<&[u8], Moniker> = HashMap::new();
45 collect_type_table(
46 tree.root_node(),
47 source.as_bytes(),
48 &module,
49 &mut type_table,
50 );
51 let mut callable_table: HashMap<(Moniker, Vec<u8>), Vec<u8>> = HashMap::new();
52 collect_callable_table(
53 tree.root_node(),
54 source.as_bytes(),
55 &module,
56 false,
57 &mut callable_table,
58 );
59 let strat = Strategy {
60 module: module.clone(),
61 source_bytes: source.as_bytes(),
62 deep,
63 imports: RefCell::new(HashMap::<Vec<u8>, &'static [u8]>::new()),
64 local_scope: RefCell::new(Vec::new()),
65 type_table,
66 callable_table,
67 };
68 let walker = CanonicalWalker::new(&strat, source.as_bytes());
69 walker.walk(tree.root_node(), &module, &mut graph);
70 if let Some(docstring) = strategy::first_docstring(tree.root_node()) {
71 strategy::emit_docstring_def(docstring, &module, &mut graph);
72 }
73 graph
74}
75
76pub struct Lang;
77
78impl crate::lang::LangExtractor for Lang {
79 type Presets = Presets;
80 const LANG_TAG: &'static str = "python";
81 const ALLOWED_KINDS: &'static [&'static str] =
82 &["class", "function", "method", "async_function"];
83 const ALLOWED_VISIBILITIES: &'static [&'static str] = &["public", "private", "module"];
84
85 fn extract(
86 uri: &str,
87 source: &str,
88 anchor: &Moniker,
89 deep: bool,
90 presets: &Self::Presets,
91 ) -> CodeGraph {
92 extract(uri, source, anchor, deep, presets)
93 }
94}
95
96#[cfg(test)]
97mod tests {
98 use super::*;
99 use crate::core::moniker::MonikerBuilder;
100 use crate::lang::assert_conformance;
101
102 fn make_anchor() -> Moniker {
103 MonikerBuilder::new().project(b"app").build()
104 }
105
106 fn extract_default(uri: &str, source: &str, anchor: &Moniker, deep: bool) -> CodeGraph {
107 let g = extract(uri, source, anchor, deep, &Presets::default());
108 assert_conformance::<super::Lang>(&g, anchor);
109 g
110 }
111
112 #[test]
113 fn parse_empty_returns_module() {
114 let tree = parse("");
115 assert_eq!(tree.root_node().kind(), "module");
116 }
117
118 #[test]
119 fn extract_emits_comment_def_per_comment_node() {
120 let src = "# a\n# b\nclass Foo: pass\n";
121 let g = extract_default("acme/foo.py", src, &make_anchor(), false);
122 let n = g.defs().filter(|d| d.kind == b"comment").count();
123 assert_eq!(n, 2);
124 }
125
126 #[test]
127 fn extract_module_uses_path_segments() {
128 let g = extract_default("acme/util/text.py", "", &make_anchor(), false);
129 let expected = MonikerBuilder::new()
130 .project(b"app")
131 .segment(b"lang", b"python")
132 .segment(b"package", b"acme")
133 .segment(b"package", b"util")
134 .segment(b"module", b"text")
135 .build();
136 assert_eq!(g.root(), &expected);
137 }
138
139 #[test]
140 fn extract_module_root_is_filename_only() {
141 let g = extract_default("foo.py", "", &make_anchor(), false);
142 let expected = MonikerBuilder::new()
143 .project(b"app")
144 .segment(b"lang", b"python")
145 .segment(b"module", b"foo")
146 .build();
147 assert_eq!(g.root(), &expected);
148 }
149
150 #[test]
151 fn extract_class_emits_class_def_with_public_visibility_default() {
152 let g = extract_default("foo.py", "class Foo:\n pass\n", &make_anchor(), false);
153 let foo = g.defs().find(|d| d.kind == b"class").expect("class def");
154 assert_eq!(foo.visibility, b"public".to_vec());
155 }
156
157 #[test]
158 fn extract_function_with_typed_params_emits_full_signature() {
159 let src = "def make(x: int, y: str) -> int:\n return x\n";
160 let g = extract_default("m.py", src, &make_anchor(), false);
161 let f = g
162 .defs()
163 .find(|d| d.kind == b"function")
164 .expect("function def");
165 let last = f.moniker.as_view().segments().last().unwrap();
166 assert_eq!(last.kind, b"function");
167 assert_eq!(last.name, b"make(x:int,y:str)");
168 assert_eq!(f.signature, b"x:int,y:str".to_vec());
169 }
170
171 #[test]
172 fn extract_function_with_untyped_params_uses_name_only_slots() {
173 let src = "def f(a, b=1):\n return a\n";
174 let g = extract_default("m.py", src, &make_anchor(), false);
175 let f = g
176 .defs()
177 .find(|d| d.kind == b"function")
178 .expect("function def");
179 let last = f.moniker.as_view().segments().last().unwrap();
180 assert_eq!(last.name, b"f(a,b)");
181 assert_eq!(f.signature, b"a,b".to_vec());
182 }
183
184 #[test]
185 fn extract_method_excludes_self_from_signature() {
186 let src = "class Foo:\n def bar(self, x: int) -> int:\n return x\n";
187 let g = extract_default("foo.py", src, &make_anchor(), false);
188 let m = g.defs().find(|d| d.kind == b"method").expect("method def");
189 let last = m.moniker.as_view().segments().last().unwrap();
190 assert_eq!(last.kind, b"method");
191 assert_eq!(last.name, b"bar(x:int)");
192 assert_eq!(m.signature, b"x:int".to_vec());
193 }
194
195 #[test]
196 fn extract_classmethod_excludes_cls_from_signature() {
197 let src = "class Foo:\n @classmethod\n def make(cls, x: int) -> 'Foo':\n return cls()\n";
198 let g = extract_default("foo.py", src, &make_anchor(), false);
199 let m = g.defs().find(|d| d.kind == b"method").expect("method def");
200 assert_eq!(
201 m.moniker.as_view().segments().last().unwrap().name,
202 b"make(x:int)"
203 );
204 }
205
206 #[test]
207 fn extract_dunder_visibility_is_public() {
208 let src = "class Foo:\n def __init__(self):\n pass\n";
209 let g = extract_default("foo.py", src, &make_anchor(), false);
210 let m = g.defs().find(|d| d.kind == b"method").expect("__init__");
211 assert_eq!(m.visibility, b"public".to_vec());
212 }
213
214 #[test]
215 fn extract_double_underscore_visibility_is_private() {
216 let src = "class Foo:\n def __secret(self):\n pass\n";
217 let g = extract_default("foo.py", src, &make_anchor(), false);
218 let m = g.defs().find(|d| d.kind == b"method").expect("method def");
219 assert_eq!(m.visibility, b"private".to_vec());
220 }
221
222 #[test]
223 fn extract_single_underscore_visibility_is_module() {
224 let src = "def _internal():\n pass\n";
225 let g = extract_default("m.py", src, &make_anchor(), false);
226 let f = g
227 .defs()
228 .find(|d| d.kind == b"function")
229 .expect("function def");
230 assert_eq!(f.visibility, b"module".to_vec());
231 }
232
233 #[test]
234 fn extract_import_module_emits_imports_module() {
235 let src = "import os\nimport acme.util as u\n";
236 let g = extract_default("m.py", src, &make_anchor(), false);
237 let kinds: Vec<&[u8]> = g.refs().map(|r| r.kind.as_slice()).collect();
238 assert_eq!(kinds.iter().filter(|k| **k == b"imports_module").count(), 2);
239 }
240
241 #[test]
242 fn extract_stdlib_import_marks_external() {
243 let g = extract_default("m.py", "import json\n", &make_anchor(), false);
244 let r = g
245 .refs()
246 .find(|r| r.kind == b"imports_module")
247 .expect("imports_module");
248 assert_eq!(r.confidence, b"external".to_vec());
249 }
250
251 #[test]
252 fn extract_project_import_marks_imported() {
253 let g = extract_default("m.py", "import acme.util\n", &make_anchor(), false);
254 let r = g
255 .refs()
256 .find(|r| r.kind == b"imports_module")
257 .expect("imports_module");
258 assert_eq!(r.confidence, b"imported".to_vec());
259 }
260
261 #[test]
262 fn extract_from_import_emits_one_imports_symbol_per_name() {
263 let src = "from acme.util import a, b as c\n";
264 let g = extract_default("m.py", src, &make_anchor(), false);
265 let names: Vec<&[u8]> = g
266 .refs()
267 .filter(|r| r.kind == b"imports_symbol")
268 .map(|r| r.target.as_view().segments().last().unwrap().name)
269 .collect();
270 assert_eq!(names, vec![&b"a"[..], &b"b"[..]]);
271 let segs: Vec<_> = g
272 .refs()
273 .find(|r| r.kind == b"imports_symbol")
274 .unwrap()
275 .target
276 .as_view()
277 .segments()
278 .collect();
279 let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
280 assert_eq!(
281 kinds,
282 vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
283 );
284 let aliased = g
285 .refs()
286 .find(|r| r.kind == b"imports_symbol" && r.alias == b"c")
287 .expect("aliased import");
288 assert_eq!(aliased.alias, b"c".to_vec());
289 }
290
291 #[test]
292 fn extract_relative_import_resolves_against_importer() {
293 let src = "from .util import helper\n";
294 let g = extract_default("acme/m.py", src, &make_anchor(), false);
295 let r = g
296 .refs()
297 .find(|r| r.kind == b"imports_symbol")
298 .expect("imports_symbol");
299 let segs: Vec<_> = r.target.as_view().segments().collect();
300 let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
301 let names: Vec<&[u8]> = segs.iter().map(|s| s.name).collect();
302 assert_eq!(
303 kinds,
304 vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
305 );
306 assert_eq!(
307 names,
308 vec![&b"python"[..], &b"acme"[..], &b"util"[..], &b"helper"[..]]
309 );
310 }
311
312 #[test]
313 fn extract_relative_import_underflow_falls_back_to_external_pkg() {
314 let src = "from ...foo import bar\n";
315 let g = extract_default("m.py", src, &make_anchor(), false);
316 let r = g
317 .refs()
318 .find(|r| r.kind == b"imports_symbol")
319 .expect("imports_symbol");
320 let segs: Vec<_> = r.target.as_view().segments().collect();
321 assert_eq!(segs[0].kind, b"external_pkg");
322 assert_eq!(segs[0].name, b"...");
323 }
324
325 #[test]
326 fn extract_decorator_emits_annotates() {
327 let src = "import functools\n@functools.wraps(fn)\ndef g():\n pass\n";
328 let g = extract_default("m.py", src, &make_anchor(), false);
329 let ann = g
330 .refs()
331 .find(|r| r.kind == b"annotates")
332 .expect("annotates ref");
333 assert_eq!(
334 ann.target.as_view().segments().last().unwrap().name,
335 b"wraps"
336 );
337 }
338
339 #[test]
340 fn extract_base_class_emits_extends() {
341 let src = "class A:\n pass\nclass B(A):\n pass\n";
342 let g = extract_default("m.py", src, &make_anchor(), false);
343 let extends_a = g
344 .refs()
345 .find(|r| r.kind == b"extends")
346 .expect("extends ref");
347 assert_eq!(extends_a.confidence, b"resolved".to_vec());
348 let last = extends_a.target.as_view().segments().last().unwrap();
349 assert_eq!(last.kind, b"class");
350 assert_eq!(last.name, b"A");
351 }
352
353 #[test]
354 fn extract_method_call_carries_receiver_hint_self() {
355 let src =
356 "class Foo:\n def m(self):\n self.bar()\n def bar(self):\n pass\n";
357 let g = extract_default("foo.py", src, &make_anchor(), false);
358 let r = g
359 .refs()
360 .find(|r| r.kind == b"method_call")
361 .expect("method_call ref");
362 assert_eq!(r.receiver_hint, b"self".to_vec());
363 }
364
365 #[test]
366 fn extract_method_call_receiver_hint_carries_identifier_text() {
367 let src = "def f():\n obj.bar()\n";
368 let g = extract_default("m.py", src, &make_anchor(), false);
369 let r = g
370 .refs()
371 .find(|r| r.kind == b"method_call")
372 .expect("method_call ref");
373 assert_eq!(
374 r.receiver_hint,
375 b"obj".to_vec(),
376 "receiver hint must carry the identifier text for non-self/cls receivers",
377 );
378 }
379
380 #[test]
381 fn extract_call_with_imported_name_marks_imported_confidence() {
382 let src = "from acme import helper\ndef f():\n helper()\n";
383 let g = extract_default("m.py", src, &make_anchor(), false);
384 let r = g
385 .refs()
386 .find(|r| {
387 r.kind == b"calls"
388 && r.target.as_view().segments().last().unwrap().name == b"helper"
389 })
390 .expect("calls helper (name-only — imported callee, signature unknown)");
391 assert_eq!(r.confidence, b"imported".to_vec());
392 }
393
394 #[test]
395 fn extract_param_read_marks_confidence_local() {
396 let src = "def f(x):\n return x\n";
397 let g = extract_default("m.py", src, &make_anchor(), true);
398 let r = g
399 .refs()
400 .find(|r| {
401 r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"x"
402 })
403 .expect("reads x");
404 assert_eq!(r.confidence, b"local".to_vec());
405 }
406
407 #[test]
408 fn extract_deep_emits_param_def() {
409 let src = "def f(x, y: int):\n return x\n";
410 let g = extract_default("m.py", src, &make_anchor(), true);
411 let params: Vec<&[u8]> = g
412 .defs()
413 .filter(|d| d.kind == b"param")
414 .map(|d| d.moniker.as_view().segments().last().unwrap().name)
415 .collect();
416 assert!(params.contains(&&b"x"[..]));
417 assert!(params.contains(&&b"y"[..]));
418 }
419
420 #[test]
421 fn extract_typed_param_emits_uses_type() {
422 let src = "def f(x: int):\n return x\n";
423 let g = extract_default("m.py", src, &make_anchor(), false);
424 let r = g
425 .refs()
426 .find(|r| {
427 r.kind == b"uses_type"
428 && r.target.as_view().segments().last().unwrap().name == b"int"
429 })
430 .expect("uses_type int");
431 assert!(matches!(
432 r.confidence.as_slice(),
433 b"name_match" | b"resolved"
434 ));
435 }
436
437 #[test]
438 fn extract_function_docstring_emits_comment_def_parented_on_function() {
439 let src = "def f():\n \"\"\"docstring\"\"\"\n return 0\n";
440 let g = extract_default("m.py", src, &make_anchor(), false);
441 let fn_moniker = MonikerBuilder::new()
442 .project(b"app")
443 .segment(b"lang", b"python")
444 .segment(b"module", b"m")
445 .segment(b"function", b"f()")
446 .build();
447 let docstring_count = g
448 .defs()
449 .filter(|d| d.kind == b"comment")
450 .filter(|d| {
451 d.parent
452 .and_then(|i| g.defs().nth(i))
453 .is_some_and(|p| p.moniker == fn_moniker)
454 })
455 .count();
456 assert_eq!(
457 docstring_count,
458 1,
459 "function docstring must emit one comment def parented on the function. defs: {:?}",
460 g.def_monikers()
461 );
462 }
463
464 #[test]
465 fn extract_class_and_module_docstring_each_emit_one_comment() {
466 let src = "\"\"\"module doc\"\"\"\nclass A:\n \"\"\"class doc\"\"\"\n pass\n";
467 let g = extract_default("m.py", src, &make_anchor(), false);
468 assert_eq!(
469 g.defs().filter(|d| d.kind == b"comment").count(),
470 2,
471 "module-level and class docstrings should each yield one comment def. defs: {:?}",
472 g.def_monikers()
473 );
474 }
475
476 #[test]
477 fn extract_non_docstring_string_at_start_is_not_a_comment() {
478 let src = "x = \"hello\"\n";
479 let g = extract_default("m.py", src, &make_anchor(), false);
480 assert_eq!(
481 g.defs().filter(|d| d.kind == b"comment").count(),
482 0,
483 "string literals that aren't bare expression-statement-strings must NOT be treated as docstrings"
484 );
485 }
486
487 #[test]
488 fn extract_subscript_type_descends_into_arguments() {
489 let src = "from typing import List\ndef f(xs: List[int]) -> List[int]:\n return xs\n";
490 let g = extract_default("m.py", src, &make_anchor(), false);
491 let kinds: Vec<&[u8]> = g
492 .refs()
493 .filter(|r| r.kind == b"uses_type")
494 .map(|r| r.target.as_view().segments().last().unwrap().name)
495 .collect();
496 assert!(kinds.contains(&&b"List"[..]));
497 assert!(kinds.contains(&&b"int"[..]));
498 }
499}