Skip to main content

code_moniker_core/lang/python/
mod.rs

1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use tree_sitter::{Language, Parser, Tree};
5
6use crate::core::code_graph::CodeGraph;
7use crate::core::moniker::Moniker;
8
9use crate::lang::canonical_walker::CanonicalWalker;
10
11pub mod build;
12mod canonicalize;
13mod kinds;
14mod strategy;
15
16use canonicalize::compute_module_moniker;
17use strategy::{Strategy, collect_callable_table, collect_type_table};
18
19#[derive(Clone, Debug, Default)]
20pub struct Presets {}
21
22pub fn parse(source: &str) -> Tree {
23	let mut parser = Parser::new();
24	let language: Language = tree_sitter_python::LANGUAGE.into();
25	parser
26		.set_language(&language)
27		.expect("failed to load tree-sitter Python grammar");
28	parser
29		.parse(source, None)
30		.expect("tree-sitter parse returned None on a non-cancelled call")
31}
32
33pub fn extract(
34	uri: &str,
35	source: &str,
36	anchor: &Moniker,
37	deep: bool,
38	_presets: &Presets,
39) -> CodeGraph {
40	let tree = parse(source);
41	let module = compute_module_moniker(anchor, uri);
42	let (def_cap, ref_cap) = CodeGraph::capacity_for_source(source.len());
43	let mut graph = CodeGraph::with_capacity(module.clone(), kinds::MODULE, def_cap, ref_cap);
44	let mut type_table: HashMap<&[u8], Moniker> = HashMap::new();
45	collect_type_table(
46		tree.root_node(),
47		source.as_bytes(),
48		&module,
49		&mut type_table,
50	);
51	let mut callable_table: HashMap<(Moniker, Vec<u8>), Vec<u8>> = HashMap::new();
52	collect_callable_table(
53		tree.root_node(),
54		source.as_bytes(),
55		&module,
56		false,
57		&mut callable_table,
58	);
59	let strat = Strategy {
60		module: module.clone(),
61		source_bytes: source.as_bytes(),
62		deep,
63		imports: RefCell::new(HashMap::<Vec<u8>, &'static [u8]>::new()),
64		import_targets: RefCell::new(HashMap::<Vec<u8>, _>::new()),
65		local_scope: RefCell::new(Vec::new()),
66		type_table,
67		callable_table,
68	};
69	let walker = CanonicalWalker::new(&strat, source.as_bytes());
70	walker.walk(tree.root_node(), &module, &mut graph);
71	if let Some(docstring) = strategy::first_docstring(tree.root_node()) {
72		strategy::emit_docstring_def(docstring, &module, &mut graph);
73	}
74	graph
75}
76
77pub struct Lang;
78
79impl crate::lang::LangExtractor for Lang {
80	type Presets = Presets;
81	const LANG_TAG: &'static str = "python";
82	const ALLOWED_KINDS: &'static [&'static str] =
83		&["class", "function", "method", "async_function"];
84	const ALLOWED_VISIBILITIES: &'static [&'static str] = &["public", "private", "module"];
85
86	fn extract(
87		uri: &str,
88		source: &str,
89		anchor: &Moniker,
90		deep: bool,
91		presets: &Self::Presets,
92	) -> CodeGraph {
93		extract(uri, source, anchor, deep, presets)
94	}
95}
96
97#[cfg(test)]
98mod tests {
99	use super::*;
100	use crate::core::moniker::MonikerBuilder;
101	use crate::lang::assert_conformance;
102
103	fn make_anchor() -> Moniker {
104		MonikerBuilder::new().project(b"app").build()
105	}
106
107	fn extract_default(uri: &str, source: &str, anchor: &Moniker, deep: bool) -> CodeGraph {
108		let g = extract(uri, source, anchor, deep, &Presets::default());
109		assert_conformance::<super::Lang>(&g, anchor);
110		g
111	}
112
113	#[test]
114	fn parse_empty_returns_module() {
115		let tree = parse("");
116		assert_eq!(tree.root_node().kind(), "module");
117	}
118
119	#[test]
120	fn extract_module_uses_path_segments() {
121		let g = extract_default("acme/util/text.py", "", &make_anchor(), false);
122		let expected = MonikerBuilder::new()
123			.project(b"app")
124			.segment(b"lang", b"python")
125			.segment(b"package", b"acme")
126			.segment(b"package", b"util")
127			.segment(b"module", b"text")
128			.build();
129		assert_eq!(g.root(), &expected);
130	}
131
132	#[test]
133	fn extract_module_root_is_filename_only() {
134		let g = extract_default("foo.py", "", &make_anchor(), false);
135		let expected = MonikerBuilder::new()
136			.project(b"app")
137			.segment(b"lang", b"python")
138			.segment(b"module", b"foo")
139			.build();
140		assert_eq!(g.root(), &expected);
141	}
142
143	#[test]
144	fn extract_function_with_typed_params_emits_full_signature() {
145		let src = "def make(x: int, y: str) -> int:\n    return x\n";
146		let g = extract_default("m.py", src, &make_anchor(), false);
147		let f = g
148			.defs()
149			.find(|d| d.kind == b"function")
150			.expect("function def");
151		let last = f.moniker.as_view().segments().last().unwrap();
152		assert_eq!(last.kind, b"function");
153		assert_eq!(last.name, b"make(x:int,y:str)");
154		assert_eq!(f.signature, b"x:int,y:str".to_vec());
155	}
156
157	#[test]
158	fn extract_function_with_untyped_params_uses_name_only_slots() {
159		let src = "def f(a, b=1):\n    return a\n";
160		let g = extract_default("m.py", src, &make_anchor(), false);
161		let f = g
162			.defs()
163			.find(|d| d.kind == b"function")
164			.expect("function def");
165		let last = f.moniker.as_view().segments().last().unwrap();
166		assert_eq!(last.name, b"f(a,b)");
167		assert_eq!(f.signature, b"a,b".to_vec());
168	}
169
170	#[test]
171	fn extract_classmethod_excludes_cls_from_signature() {
172		let src = "class Foo:\n    @classmethod\n    def make(cls, x: int) -> 'Foo':\n        return cls()\n";
173		let g = extract_default("foo.py", src, &make_anchor(), false);
174		let m = g.defs().find(|d| d.kind == b"method").expect("method def");
175		assert_eq!(
176			m.moniker.as_view().segments().last().unwrap().name,
177			b"make(x:int)"
178		);
179	}
180
181	#[test]
182	fn extract_double_underscore_visibility_is_private() {
183		let src = "class Foo:\n    def __secret(self):\n        pass\n";
184		let g = extract_default("foo.py", src, &make_anchor(), false);
185		let m = g.defs().find(|d| d.kind == b"method").expect("method def");
186		assert_eq!(m.visibility, b"private".to_vec());
187	}
188
189	#[test]
190	fn extract_single_underscore_visibility_is_module() {
191		let src = "def _internal():\n    pass\n";
192		let g = extract_default("m.py", src, &make_anchor(), false);
193		let f = g
194			.defs()
195			.find(|d| d.kind == b"function")
196			.expect("function def");
197		assert_eq!(f.visibility, b"module".to_vec());
198	}
199
200	#[test]
201	fn extract_import_module_emits_imports_module() {
202		let src = "import os\nimport acme.util as u\n";
203		let g = extract_default("m.py", src, &make_anchor(), false);
204		let kinds: Vec<&[u8]> = g.refs().map(|r| r.kind.as_slice()).collect();
205		assert_eq!(kinds.iter().filter(|k| **k == b"imports_module").count(), 2);
206	}
207
208	#[test]
209	fn extract_stdlib_import_marks_external() {
210		let g = extract_default("m.py", "import json\n", &make_anchor(), false);
211		let r = g
212			.refs()
213			.find(|r| r.kind == b"imports_module")
214			.expect("imports_module");
215		assert_eq!(r.confidence, b"external".to_vec());
216	}
217
218	#[test]
219	fn extract_project_import_marks_imported() {
220		let g = extract_default("m.py", "import acme.util\n", &make_anchor(), false);
221		let r = g
222			.refs()
223			.find(|r| r.kind == b"imports_module")
224			.expect("imports_module");
225		assert_eq!(r.confidence, b"imported".to_vec());
226	}
227
228	#[test]
229	fn extract_from_import_emits_one_imports_symbol_per_name() {
230		let src = "from acme.util import a, b as c\n";
231		let g = extract_default("m.py", src, &make_anchor(), false);
232		let names: Vec<&[u8]> = g
233			.refs()
234			.filter(|r| r.kind == b"imports_symbol")
235			.map(|r| r.target.as_view().segments().last().unwrap().name)
236			.collect();
237		assert_eq!(names, vec![&b"a"[..], &b"b"[..]]);
238		let segs: Vec<_> = g
239			.refs()
240			.find(|r| r.kind == b"imports_symbol")
241			.unwrap()
242			.target
243			.as_view()
244			.segments()
245			.collect();
246		let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
247		assert_eq!(
248			kinds,
249			vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
250		);
251		let aliased = g
252			.refs()
253			.find(|r| r.kind == b"imports_symbol" && r.alias == b"c")
254			.expect("aliased import");
255		assert_eq!(aliased.alias, b"c".to_vec());
256	}
257
258	#[test]
259	fn extract_relative_import_resolves_against_importer() {
260		let src = "from .util import helper\n";
261		let g = extract_default("acme/m.py", src, &make_anchor(), false);
262		let r = g
263			.refs()
264			.find(|r| r.kind == b"imports_symbol")
265			.expect("imports_symbol");
266		let segs: Vec<_> = r.target.as_view().segments().collect();
267		let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
268		let names: Vec<&[u8]> = segs.iter().map(|s| s.name).collect();
269		assert_eq!(
270			kinds,
271			vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
272		);
273		assert_eq!(
274			names,
275			vec![&b"python"[..], &b"acme"[..], &b"util"[..], &b"helper"[..]]
276		);
277	}
278
279	#[test]
280	fn extract_relative_import_underflow_falls_back_to_external_pkg() {
281		let src = "from ...foo import bar\n";
282		let g = extract_default("m.py", src, &make_anchor(), false);
283		let r = g
284			.refs()
285			.find(|r| r.kind == b"imports_symbol")
286			.expect("imports_symbol");
287		let segs: Vec<_> = r.target.as_view().segments().collect();
288		assert_eq!(segs[0].kind, b"external_pkg");
289		assert_eq!(segs[0].name, b"...");
290	}
291
292	#[test]
293	fn extract_decorator_emits_annotates() {
294		let src = "import functools\n@functools.wraps(fn)\ndef g():\n    pass\n";
295		let g = extract_default("m.py", src, &make_anchor(), false);
296		let ann = g
297			.refs()
298			.find(|r| r.kind == b"annotates")
299			.expect("annotates ref");
300		assert_eq!(
301			ann.target.as_view().segments().last().unwrap().name,
302			b"wraps"
303		);
304	}
305
306	#[test]
307	fn extract_param_read_marks_confidence_local() {
308		let src = "def f(x):\n    return x\n";
309		let g = extract_default("m.py", src, &make_anchor(), true);
310		let r = g
311			.refs()
312			.find(|r| {
313				r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"x"
314			})
315			.expect("reads x");
316		assert_eq!(r.confidence, b"local".to_vec());
317	}
318
319	#[test]
320	fn extract_deep_emits_param_def() {
321		let src = "def f(x, y: int):\n    return x\n";
322		let g = extract_default("m.py", src, &make_anchor(), true);
323		let params: Vec<&[u8]> = g
324			.defs()
325			.filter(|d| d.kind == b"param")
326			.map(|d| d.moniker.as_view().segments().last().unwrap().name)
327			.collect();
328		assert!(params.contains(&&b"x"[..]));
329		assert!(params.contains(&&b"y"[..]));
330	}
331
332	#[test]
333	fn extract_function_docstring_emits_comment_def_parented_on_function() {
334		let src = "def f():\n    \"\"\"docstring\"\"\"\n    return 0\n";
335		let g = extract_default("m.py", src, &make_anchor(), false);
336		let fn_moniker = MonikerBuilder::new()
337			.project(b"app")
338			.segment(b"lang", b"python")
339			.segment(b"module", b"m")
340			.segment(b"function", b"f()")
341			.build();
342		let docstring_count = g
343			.defs()
344			.filter(|d| d.kind == b"comment")
345			.filter(|d| {
346				d.parent
347					.and_then(|i| g.defs().nth(i))
348					.is_some_and(|p| p.moniker == fn_moniker)
349			})
350			.count();
351		assert_eq!(
352			docstring_count,
353			1,
354			"function docstring must emit one comment def parented on the function. defs: {:?}",
355			g.def_monikers()
356		);
357	}
358
359	#[test]
360	fn extract_class_and_module_docstring_each_emit_one_comment() {
361		let src = "\"\"\"module doc\"\"\"\nclass A:\n    \"\"\"class doc\"\"\"\n    pass\n";
362		let g = extract_default("m.py", src, &make_anchor(), false);
363		assert_eq!(
364			g.defs().filter(|d| d.kind == b"comment").count(),
365			2,
366			"module-level and class docstrings should each yield one comment def. defs: {:?}",
367			g.def_monikers()
368		);
369	}
370
371	#[test]
372	fn extract_non_docstring_string_at_start_is_not_a_comment() {
373		let src = "x = \"hello\"\n";
374		let g = extract_default("m.py", src, &make_anchor(), false);
375		assert_eq!(
376			g.defs().filter(|d| d.kind == b"comment").count(),
377			0,
378			"string literals that aren't bare expression-statement-strings must NOT be treated as docstrings"
379		);
380	}
381}