Skip to main content

code_moniker_core/lang/python/
mod.rs

1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use tree_sitter::{Language, Parser, Tree};
5
6use crate::core::code_graph::CodeGraph;
7use crate::core::moniker::Moniker;
8
9use crate::lang::canonical_walker::CanonicalWalker;
10
11pub mod build;
12mod canonicalize;
13mod kinds;
14mod strategy;
15
16use canonicalize::compute_module_moniker;
17use strategy::{Strategy, collect_callable_table, collect_type_table};
18
19#[derive(Clone, Debug, Default)]
20pub struct Presets {}
21
22pub fn parse(source: &str) -> Tree {
23	let mut parser = Parser::new();
24	let language: Language = tree_sitter_python::LANGUAGE.into();
25	parser
26		.set_language(&language)
27		.expect("failed to load tree-sitter Python grammar");
28	parser
29		.parse(source, None)
30		.expect("tree-sitter parse returned None on a non-cancelled call")
31}
32
33pub fn extract(
34	uri: &str,
35	source: &str,
36	anchor: &Moniker,
37	deep: bool,
38	_presets: &Presets,
39) -> CodeGraph {
40	let tree = parse(source);
41	let module = compute_module_moniker(anchor, uri);
42	let (def_cap, ref_cap) = CodeGraph::capacity_for_source(source.len());
43	let mut graph = CodeGraph::with_capacity(module.clone(), kinds::MODULE, def_cap, ref_cap);
44	let mut type_table: HashMap<&[u8], Moniker> = HashMap::new();
45	collect_type_table(
46		tree.root_node(),
47		source.as_bytes(),
48		&module,
49		&mut type_table,
50	);
51	let mut callable_table: HashMap<(Moniker, Vec<u8>), Vec<u8>> = HashMap::new();
52	collect_callable_table(
53		tree.root_node(),
54		source.as_bytes(),
55		&module,
56		false,
57		&mut callable_table,
58	);
59	let strat = Strategy {
60		module: module.clone(),
61		source_bytes: source.as_bytes(),
62		deep,
63		imports: RefCell::new(HashMap::<Vec<u8>, &'static [u8]>::new()),
64		local_scope: RefCell::new(Vec::new()),
65		type_table,
66		callable_table,
67	};
68	let walker = CanonicalWalker::new(&strat, source.as_bytes());
69	walker.walk(tree.root_node(), &module, &mut graph);
70	if let Some(docstring) = strategy::first_docstring(tree.root_node()) {
71		strategy::emit_docstring_def(docstring, &module, &mut graph);
72	}
73	graph
74}
75
76pub struct Lang;
77
78impl crate::lang::LangExtractor for Lang {
79	type Presets = Presets;
80	const LANG_TAG: &'static str = "python";
81	const ALLOWED_KINDS: &'static [&'static str] =
82		&["class", "function", "method", "async_function"];
83	const ALLOWED_VISIBILITIES: &'static [&'static str] = &["public", "private", "module"];
84
85	fn extract(
86		uri: &str,
87		source: &str,
88		anchor: &Moniker,
89		deep: bool,
90		presets: &Self::Presets,
91	) -> CodeGraph {
92		extract(uri, source, anchor, deep, presets)
93	}
94}
95
96#[cfg(test)]
97mod tests {
98	use super::*;
99	use crate::core::moniker::MonikerBuilder;
100	use crate::lang::assert_conformance;
101
102	fn make_anchor() -> Moniker {
103		MonikerBuilder::new().project(b"app").build()
104	}
105
106	fn extract_default(uri: &str, source: &str, anchor: &Moniker, deep: bool) -> CodeGraph {
107		let g = extract(uri, source, anchor, deep, &Presets::default());
108		assert_conformance::<super::Lang>(&g, anchor);
109		g
110	}
111
112	#[test]
113	fn parse_empty_returns_module() {
114		let tree = parse("");
115		assert_eq!(tree.root_node().kind(), "module");
116	}
117
118	#[test]
119	fn extract_emits_comment_def_per_comment_node() {
120		let src = "# a\n# b\nclass Foo: pass\n";
121		let g = extract_default("acme/foo.py", src, &make_anchor(), false);
122		let n = g.defs().filter(|d| d.kind == b"comment").count();
123		assert_eq!(n, 2);
124	}
125
126	#[test]
127	fn extract_module_uses_path_segments() {
128		let g = extract_default("acme/util/text.py", "", &make_anchor(), false);
129		let expected = MonikerBuilder::new()
130			.project(b"app")
131			.segment(b"lang", b"python")
132			.segment(b"package", b"acme")
133			.segment(b"package", b"util")
134			.segment(b"module", b"text")
135			.build();
136		assert_eq!(g.root(), &expected);
137	}
138
139	#[test]
140	fn extract_module_root_is_filename_only() {
141		let g = extract_default("foo.py", "", &make_anchor(), false);
142		let expected = MonikerBuilder::new()
143			.project(b"app")
144			.segment(b"lang", b"python")
145			.segment(b"module", b"foo")
146			.build();
147		assert_eq!(g.root(), &expected);
148	}
149
150	#[test]
151	fn extract_class_emits_class_def_with_public_visibility_default() {
152		let g = extract_default("foo.py", "class Foo:\n    pass\n", &make_anchor(), false);
153		let foo = g.defs().find(|d| d.kind == b"class").expect("class def");
154		assert_eq!(foo.visibility, b"public".to_vec());
155	}
156
157	#[test]
158	fn extract_function_with_typed_params_emits_full_signature() {
159		let src = "def make(x: int, y: str) -> int:\n    return x\n";
160		let g = extract_default("m.py", src, &make_anchor(), false);
161		let f = g
162			.defs()
163			.find(|d| d.kind == b"function")
164			.expect("function def");
165		let last = f.moniker.as_view().segments().last().unwrap();
166		assert_eq!(last.kind, b"function");
167		assert_eq!(last.name, b"make(x:int,y:str)");
168		assert_eq!(f.signature, b"x:int,y:str".to_vec());
169	}
170
171	#[test]
172	fn extract_function_with_untyped_params_uses_name_only_slots() {
173		let src = "def f(a, b=1):\n    return a\n";
174		let g = extract_default("m.py", src, &make_anchor(), false);
175		let f = g
176			.defs()
177			.find(|d| d.kind == b"function")
178			.expect("function def");
179		let last = f.moniker.as_view().segments().last().unwrap();
180		assert_eq!(last.name, b"f(a,b)");
181		assert_eq!(f.signature, b"a,b".to_vec());
182	}
183
184	#[test]
185	fn extract_method_excludes_self_from_signature() {
186		let src = "class Foo:\n    def bar(self, x: int) -> int:\n        return x\n";
187		let g = extract_default("foo.py", src, &make_anchor(), false);
188		let m = g.defs().find(|d| d.kind == b"method").expect("method def");
189		let last = m.moniker.as_view().segments().last().unwrap();
190		assert_eq!(last.kind, b"method");
191		assert_eq!(last.name, b"bar(x:int)");
192		assert_eq!(m.signature, b"x:int".to_vec());
193	}
194
195	#[test]
196	fn extract_classmethod_excludes_cls_from_signature() {
197		let src = "class Foo:\n    @classmethod\n    def make(cls, x: int) -> 'Foo':\n        return cls()\n";
198		let g = extract_default("foo.py", src, &make_anchor(), false);
199		let m = g.defs().find(|d| d.kind == b"method").expect("method def");
200		assert_eq!(
201			m.moniker.as_view().segments().last().unwrap().name,
202			b"make(x:int)"
203		);
204	}
205
206	#[test]
207	fn extract_dunder_visibility_is_public() {
208		let src = "class Foo:\n    def __init__(self):\n        pass\n";
209		let g = extract_default("foo.py", src, &make_anchor(), false);
210		let m = g.defs().find(|d| d.kind == b"method").expect("__init__");
211		assert_eq!(m.visibility, b"public".to_vec());
212	}
213
214	#[test]
215	fn extract_double_underscore_visibility_is_private() {
216		let src = "class Foo:\n    def __secret(self):\n        pass\n";
217		let g = extract_default("foo.py", src, &make_anchor(), false);
218		let m = g.defs().find(|d| d.kind == b"method").expect("method def");
219		assert_eq!(m.visibility, b"private".to_vec());
220	}
221
222	#[test]
223	fn extract_single_underscore_visibility_is_module() {
224		let src = "def _internal():\n    pass\n";
225		let g = extract_default("m.py", src, &make_anchor(), false);
226		let f = g
227			.defs()
228			.find(|d| d.kind == b"function")
229			.expect("function def");
230		assert_eq!(f.visibility, b"module".to_vec());
231	}
232
233	#[test]
234	fn extract_import_module_emits_imports_module() {
235		let src = "import os\nimport acme.util as u\n";
236		let g = extract_default("m.py", src, &make_anchor(), false);
237		let kinds: Vec<&[u8]> = g.refs().map(|r| r.kind.as_slice()).collect();
238		assert_eq!(kinds.iter().filter(|k| **k == b"imports_module").count(), 2);
239	}
240
241	#[test]
242	fn extract_stdlib_import_marks_external() {
243		let g = extract_default("m.py", "import json\n", &make_anchor(), false);
244		let r = g
245			.refs()
246			.find(|r| r.kind == b"imports_module")
247			.expect("imports_module");
248		assert_eq!(r.confidence, b"external".to_vec());
249	}
250
251	#[test]
252	fn extract_project_import_marks_imported() {
253		let g = extract_default("m.py", "import acme.util\n", &make_anchor(), false);
254		let r = g
255			.refs()
256			.find(|r| r.kind == b"imports_module")
257			.expect("imports_module");
258		assert_eq!(r.confidence, b"imported".to_vec());
259	}
260
261	#[test]
262	fn extract_from_import_emits_one_imports_symbol_per_name() {
263		let src = "from acme.util import a, b as c\n";
264		let g = extract_default("m.py", src, &make_anchor(), false);
265		let names: Vec<&[u8]> = g
266			.refs()
267			.filter(|r| r.kind == b"imports_symbol")
268			.map(|r| r.target.as_view().segments().last().unwrap().name)
269			.collect();
270		assert_eq!(names, vec![&b"a"[..], &b"b"[..]]);
271		let segs: Vec<_> = g
272			.refs()
273			.find(|r| r.kind == b"imports_symbol")
274			.unwrap()
275			.target
276			.as_view()
277			.segments()
278			.collect();
279		let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
280		assert_eq!(
281			kinds,
282			vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
283		);
284		let aliased = g
285			.refs()
286			.find(|r| r.kind == b"imports_symbol" && r.alias == b"c")
287			.expect("aliased import");
288		assert_eq!(aliased.alias, b"c".to_vec());
289	}
290
291	#[test]
292	fn extract_relative_import_resolves_against_importer() {
293		let src = "from .util import helper\n";
294		let g = extract_default("acme/m.py", src, &make_anchor(), false);
295		let r = g
296			.refs()
297			.find(|r| r.kind == b"imports_symbol")
298			.expect("imports_symbol");
299		let segs: Vec<_> = r.target.as_view().segments().collect();
300		let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
301		let names: Vec<&[u8]> = segs.iter().map(|s| s.name).collect();
302		assert_eq!(
303			kinds,
304			vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
305		);
306		assert_eq!(
307			names,
308			vec![&b"python"[..], &b"acme"[..], &b"util"[..], &b"helper"[..]]
309		);
310	}
311
312	#[test]
313	fn extract_relative_import_underflow_falls_back_to_external_pkg() {
314		let src = "from ...foo import bar\n";
315		let g = extract_default("m.py", src, &make_anchor(), false);
316		let r = g
317			.refs()
318			.find(|r| r.kind == b"imports_symbol")
319			.expect("imports_symbol");
320		let segs: Vec<_> = r.target.as_view().segments().collect();
321		assert_eq!(segs[0].kind, b"external_pkg");
322		assert_eq!(segs[0].name, b"...");
323	}
324
325	#[test]
326	fn extract_decorator_emits_annotates() {
327		let src = "import functools\n@functools.wraps(fn)\ndef g():\n    pass\n";
328		let g = extract_default("m.py", src, &make_anchor(), false);
329		let ann = g
330			.refs()
331			.find(|r| r.kind == b"annotates")
332			.expect("annotates ref");
333		assert_eq!(
334			ann.target.as_view().segments().last().unwrap().name,
335			b"wraps"
336		);
337	}
338
339	#[test]
340	fn extract_base_class_emits_extends() {
341		let src = "class A:\n    pass\nclass B(A):\n    pass\n";
342		let g = extract_default("m.py", src, &make_anchor(), false);
343		let extends_a = g
344			.refs()
345			.find(|r| r.kind == b"extends")
346			.expect("extends ref");
347		assert_eq!(extends_a.confidence, b"resolved".to_vec());
348		let last = extends_a.target.as_view().segments().last().unwrap();
349		assert_eq!(last.kind, b"class");
350		assert_eq!(last.name, b"A");
351	}
352
353	#[test]
354	fn extract_method_call_carries_receiver_hint_self() {
355		let src =
356			"class Foo:\n    def m(self):\n        self.bar()\n    def bar(self):\n        pass\n";
357		let g = extract_default("foo.py", src, &make_anchor(), false);
358		let r = g
359			.refs()
360			.find(|r| r.kind == b"method_call")
361			.expect("method_call ref");
362		assert_eq!(r.receiver_hint, b"self".to_vec());
363	}
364
365	#[test]
366	fn extract_method_call_receiver_hint_carries_identifier_text() {
367		let src = "def f():\n    obj.bar()\n";
368		let g = extract_default("m.py", src, &make_anchor(), false);
369		let r = g
370			.refs()
371			.find(|r| r.kind == b"method_call")
372			.expect("method_call ref");
373		assert_eq!(
374			r.receiver_hint,
375			b"obj".to_vec(),
376			"receiver hint must carry the identifier text for non-self/cls receivers",
377		);
378	}
379
380	#[test]
381	fn extract_call_with_imported_name_marks_imported_confidence() {
382		let src = "from acme import helper\ndef f():\n    helper()\n";
383		let g = extract_default("m.py", src, &make_anchor(), false);
384		let r = g
385			.refs()
386			.find(|r| {
387				r.kind == b"calls"
388					&& r.target.as_view().segments().last().unwrap().name == b"helper"
389			})
390			.expect("calls helper (name-only — imported callee, signature unknown)");
391		assert_eq!(r.confidence, b"imported".to_vec());
392	}
393
394	#[test]
395	fn extract_param_read_marks_confidence_local() {
396		let src = "def f(x):\n    return x\n";
397		let g = extract_default("m.py", src, &make_anchor(), true);
398		let r = g
399			.refs()
400			.find(|r| {
401				r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"x"
402			})
403			.expect("reads x");
404		assert_eq!(r.confidence, b"local".to_vec());
405	}
406
407	#[test]
408	fn extract_deep_emits_param_def() {
409		let src = "def f(x, y: int):\n    return x\n";
410		let g = extract_default("m.py", src, &make_anchor(), true);
411		let params: Vec<&[u8]> = g
412			.defs()
413			.filter(|d| d.kind == b"param")
414			.map(|d| d.moniker.as_view().segments().last().unwrap().name)
415			.collect();
416		assert!(params.contains(&&b"x"[..]));
417		assert!(params.contains(&&b"y"[..]));
418	}
419
420	#[test]
421	fn extract_typed_param_emits_uses_type() {
422		let src = "def f(x: int):\n    return x\n";
423		let g = extract_default("m.py", src, &make_anchor(), false);
424		let r = g
425			.refs()
426			.find(|r| {
427				r.kind == b"uses_type"
428					&& r.target.as_view().segments().last().unwrap().name == b"int"
429			})
430			.expect("uses_type int");
431		assert!(matches!(
432			r.confidence.as_slice(),
433			b"name_match" | b"resolved"
434		));
435	}
436
437	#[test]
438	fn extract_function_docstring_emits_comment_def_parented_on_function() {
439		let src = "def f():\n    \"\"\"docstring\"\"\"\n    return 0\n";
440		let g = extract_default("m.py", src, &make_anchor(), false);
441		let fn_moniker = MonikerBuilder::new()
442			.project(b"app")
443			.segment(b"lang", b"python")
444			.segment(b"module", b"m")
445			.segment(b"function", b"f()")
446			.build();
447		let docstring_count = g
448			.defs()
449			.filter(|d| d.kind == b"comment")
450			.filter(|d| {
451				d.parent
452					.and_then(|i| g.defs().nth(i))
453					.is_some_and(|p| p.moniker == fn_moniker)
454			})
455			.count();
456		assert_eq!(
457			docstring_count,
458			1,
459			"function docstring must emit one comment def parented on the function. defs: {:?}",
460			g.def_monikers()
461		);
462	}
463
464	#[test]
465	fn extract_class_and_module_docstring_each_emit_one_comment() {
466		let src = "\"\"\"module doc\"\"\"\nclass A:\n    \"\"\"class doc\"\"\"\n    pass\n";
467		let g = extract_default("m.py", src, &make_anchor(), false);
468		assert_eq!(
469			g.defs().filter(|d| d.kind == b"comment").count(),
470			2,
471			"module-level and class docstrings should each yield one comment def. defs: {:?}",
472			g.def_monikers()
473		);
474	}
475
476	#[test]
477	fn extract_non_docstring_string_at_start_is_not_a_comment() {
478		let src = "x = \"hello\"\n";
479		let g = extract_default("m.py", src, &make_anchor(), false);
480		assert_eq!(
481			g.defs().filter(|d| d.kind == b"comment").count(),
482			0,
483			"string literals that aren't bare expression-statement-strings must NOT be treated as docstrings"
484		);
485	}
486
487	#[test]
488	fn extract_subscript_type_descends_into_arguments() {
489		let src = "from typing import List\ndef f(xs: List[int]) -> List[int]:\n    return xs\n";
490		let g = extract_default("m.py", src, &make_anchor(), false);
491		let kinds: Vec<&[u8]> = g
492			.refs()
493			.filter(|r| r.kind == b"uses_type")
494			.map(|r| r.target.as_view().segments().last().unwrap().name)
495			.collect();
496		assert!(kinds.contains(&&b"List"[..]));
497		assert!(kinds.contains(&&b"int"[..]));
498	}
499}