1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use tree_sitter::{Language, Parser, Tree};
5
6use crate::core::code_graph::CodeGraph;
7use crate::core::moniker::Moniker;
8
9use crate::lang::canonical_walker::CanonicalWalker;
10
11pub mod build;
12mod canonicalize;
13mod kinds;
14mod strategy;
15
16use canonicalize::compute_module_moniker;
17use strategy::{Strategy, collect_callable_table, collect_type_table};
18
19#[derive(Clone, Debug, Default)]
20pub struct Presets {}
21
22pub fn parse(source: &str) -> Tree {
23 let mut parser = Parser::new();
24 let language: Language = tree_sitter_python::LANGUAGE.into();
25 parser
26 .set_language(&language)
27 .expect("failed to load tree-sitter Python grammar");
28 parser
29 .parse(source, None)
30 .expect("tree-sitter parse returned None on a non-cancelled call")
31}
32
33pub fn extract(
34 uri: &str,
35 source: &str,
36 anchor: &Moniker,
37 deep: bool,
38 _presets: &Presets,
39) -> CodeGraph {
40 let tree = parse(source);
41 let module = compute_module_moniker(anchor, uri);
42 let (def_cap, ref_cap) = CodeGraph::capacity_for_source(source.len());
43 let mut graph = CodeGraph::with_capacity(module.clone(), kinds::MODULE, def_cap, ref_cap);
44 let mut type_table: HashMap<&[u8], Moniker> = HashMap::new();
45 collect_type_table(
46 tree.root_node(),
47 source.as_bytes(),
48 &module,
49 &mut type_table,
50 );
51 let mut callable_table: HashMap<(Moniker, Vec<u8>), Vec<u8>> = HashMap::new();
52 collect_callable_table(
53 tree.root_node(),
54 source.as_bytes(),
55 &module,
56 false,
57 &mut callable_table,
58 );
59 let strat = Strategy {
60 module: module.clone(),
61 source_bytes: source.as_bytes(),
62 deep,
63 imports: RefCell::new(HashMap::<Vec<u8>, &'static [u8]>::new()),
64 import_targets: RefCell::new(HashMap::<Vec<u8>, _>::new()),
65 local_scope: RefCell::new(Vec::new()),
66 type_table,
67 callable_table,
68 };
69 let walker = CanonicalWalker::new(&strat, source.as_bytes());
70 walker.walk(tree.root_node(), &module, &mut graph);
71 if let Some(docstring) = strategy::first_docstring(tree.root_node()) {
72 strategy::emit_docstring_def(docstring, &module, &mut graph);
73 }
74 graph
75}
76
77pub struct Lang;
78
79impl crate::lang::LangExtractor for Lang {
80 type Presets = Presets;
81 const LANG_TAG: &'static str = "python";
82 const ALLOWED_KINDS: &'static [&'static str] =
83 &["class", "function", "method", "async_function"];
84 const ALLOWED_VISIBILITIES: &'static [&'static str] = &["public", "private", "module"];
85
86 fn extract(
87 uri: &str,
88 source: &str,
89 anchor: &Moniker,
90 deep: bool,
91 presets: &Self::Presets,
92 ) -> CodeGraph {
93 extract(uri, source, anchor, deep, presets)
94 }
95}
96
97#[cfg(test)]
98mod tests {
99 use super::*;
100 use crate::core::moniker::MonikerBuilder;
101 use crate::lang::assert_conformance;
102
103 fn make_anchor() -> Moniker {
104 MonikerBuilder::new().project(b"app").build()
105 }
106
107 fn extract_default(uri: &str, source: &str, anchor: &Moniker, deep: bool) -> CodeGraph {
108 let g = extract(uri, source, anchor, deep, &Presets::default());
109 assert_conformance::<super::Lang>(&g, anchor);
110 g
111 }
112
113 #[test]
114 fn parse_empty_returns_module() {
115 let tree = parse("");
116 assert_eq!(tree.root_node().kind(), "module");
117 }
118
119 #[test]
120 fn extract_module_uses_path_segments() {
121 let g = extract_default("acme/util/text.py", "", &make_anchor(), false);
122 let expected = MonikerBuilder::new()
123 .project(b"app")
124 .segment(b"lang", b"python")
125 .segment(b"package", b"acme")
126 .segment(b"package", b"util")
127 .segment(b"module", b"text")
128 .build();
129 assert_eq!(g.root(), &expected);
130 }
131
132 #[test]
133 fn extract_module_root_is_filename_only() {
134 let g = extract_default("foo.py", "", &make_anchor(), false);
135 let expected = MonikerBuilder::new()
136 .project(b"app")
137 .segment(b"lang", b"python")
138 .segment(b"module", b"foo")
139 .build();
140 assert_eq!(g.root(), &expected);
141 }
142
143 #[test]
144 fn extract_function_with_typed_params_emits_full_signature() {
145 let src = "def make(x: int, y: str) -> int:\n return x\n";
146 let g = extract_default("m.py", src, &make_anchor(), false);
147 let f = g
148 .defs()
149 .find(|d| d.kind == b"function")
150 .expect("function def");
151 let last = f.moniker.as_view().segments().last().unwrap();
152 assert_eq!(last.kind, b"function");
153 assert_eq!(last.name, b"make(x:int,y:str)");
154 assert_eq!(f.signature, b"x:int,y:str".to_vec());
155 }
156
157 #[test]
158 fn extract_function_with_untyped_params_uses_name_only_slots() {
159 let src = "def f(a, b=1):\n return a\n";
160 let g = extract_default("m.py", src, &make_anchor(), false);
161 let f = g
162 .defs()
163 .find(|d| d.kind == b"function")
164 .expect("function def");
165 let last = f.moniker.as_view().segments().last().unwrap();
166 assert_eq!(last.name, b"f(a,b)");
167 assert_eq!(f.signature, b"a,b".to_vec());
168 }
169
170 #[test]
171 fn extract_classmethod_excludes_cls_from_signature() {
172 let src = "class Foo:\n @classmethod\n def make(cls, x: int) -> 'Foo':\n return cls()\n";
173 let g = extract_default("foo.py", src, &make_anchor(), false);
174 let m = g.defs().find(|d| d.kind == b"method").expect("method def");
175 assert_eq!(
176 m.moniker.as_view().segments().last().unwrap().name,
177 b"make(x:int)"
178 );
179 }
180
181 #[test]
182 fn extract_double_underscore_visibility_is_private() {
183 let src = "class Foo:\n def __secret(self):\n pass\n";
184 let g = extract_default("foo.py", src, &make_anchor(), false);
185 let m = g.defs().find(|d| d.kind == b"method").expect("method def");
186 assert_eq!(m.visibility, b"private".to_vec());
187 }
188
189 #[test]
190 fn extract_single_underscore_visibility_is_module() {
191 let src = "def _internal():\n pass\n";
192 let g = extract_default("m.py", src, &make_anchor(), false);
193 let f = g
194 .defs()
195 .find(|d| d.kind == b"function")
196 .expect("function def");
197 assert_eq!(f.visibility, b"module".to_vec());
198 }
199
200 #[test]
201 fn extract_import_module_emits_imports_module() {
202 let src = "import os\nimport acme.util as u\n";
203 let g = extract_default("m.py", src, &make_anchor(), false);
204 let kinds: Vec<&[u8]> = g.refs().map(|r| r.kind.as_slice()).collect();
205 assert_eq!(kinds.iter().filter(|k| **k == b"imports_module").count(), 2);
206 }
207
208 #[test]
209 fn extract_stdlib_import_marks_external() {
210 let g = extract_default("m.py", "import json\n", &make_anchor(), false);
211 let r = g
212 .refs()
213 .find(|r| r.kind == b"imports_module")
214 .expect("imports_module");
215 assert_eq!(r.confidence, b"external".to_vec());
216 }
217
218 #[test]
219 fn extract_project_import_marks_imported() {
220 let g = extract_default("m.py", "import acme.util\n", &make_anchor(), false);
221 let r = g
222 .refs()
223 .find(|r| r.kind == b"imports_module")
224 .expect("imports_module");
225 assert_eq!(r.confidence, b"imported".to_vec());
226 }
227
228 #[test]
229 fn extract_from_import_emits_one_imports_symbol_per_name() {
230 let src = "from acme.util import a, b as c\n";
231 let g = extract_default("m.py", src, &make_anchor(), false);
232 let names: Vec<&[u8]> = g
233 .refs()
234 .filter(|r| r.kind == b"imports_symbol")
235 .map(|r| r.target.as_view().segments().last().unwrap().name)
236 .collect();
237 assert_eq!(names, vec![&b"a"[..], &b"b"[..]]);
238 let segs: Vec<_> = g
239 .refs()
240 .find(|r| r.kind == b"imports_symbol")
241 .unwrap()
242 .target
243 .as_view()
244 .segments()
245 .collect();
246 let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
247 assert_eq!(
248 kinds,
249 vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
250 );
251 let aliased = g
252 .refs()
253 .find(|r| r.kind == b"imports_symbol" && r.alias == b"c")
254 .expect("aliased import");
255 assert_eq!(aliased.alias, b"c".to_vec());
256 }
257
258 #[test]
259 fn extract_relative_import_resolves_against_importer() {
260 let src = "from .util import helper\n";
261 let g = extract_default("acme/m.py", src, &make_anchor(), false);
262 let r = g
263 .refs()
264 .find(|r| r.kind == b"imports_symbol")
265 .expect("imports_symbol");
266 let segs: Vec<_> = r.target.as_view().segments().collect();
267 let kinds: Vec<&[u8]> = segs.iter().map(|s| s.kind).collect();
268 let names: Vec<&[u8]> = segs.iter().map(|s| s.name).collect();
269 assert_eq!(
270 kinds,
271 vec![&b"lang"[..], &b"package"[..], &b"module"[..], &b"path"[..]]
272 );
273 assert_eq!(
274 names,
275 vec![&b"python"[..], &b"acme"[..], &b"util"[..], &b"helper"[..]]
276 );
277 }
278
279 #[test]
280 fn extract_relative_import_underflow_falls_back_to_external_pkg() {
281 let src = "from ...foo import bar\n";
282 let g = extract_default("m.py", src, &make_anchor(), false);
283 let r = g
284 .refs()
285 .find(|r| r.kind == b"imports_symbol")
286 .expect("imports_symbol");
287 let segs: Vec<_> = r.target.as_view().segments().collect();
288 assert_eq!(segs[0].kind, b"external_pkg");
289 assert_eq!(segs[0].name, b"...");
290 }
291
292 #[test]
293 fn extract_decorator_emits_annotates() {
294 let src = "import functools\n@functools.wraps(fn)\ndef g():\n pass\n";
295 let g = extract_default("m.py", src, &make_anchor(), false);
296 let ann = g
297 .refs()
298 .find(|r| r.kind == b"annotates")
299 .expect("annotates ref");
300 assert_eq!(
301 ann.target.as_view().segments().last().unwrap().name,
302 b"wraps"
303 );
304 }
305
306 #[test]
307 fn extract_param_read_marks_confidence_local() {
308 let src = "def f(x):\n return x\n";
309 let g = extract_default("m.py", src, &make_anchor(), true);
310 let r = g
311 .refs()
312 .find(|r| {
313 r.kind == b"reads" && r.target.as_view().segments().last().unwrap().name == b"x"
314 })
315 .expect("reads x");
316 assert_eq!(r.confidence, b"local".to_vec());
317 }
318
319 #[test]
320 fn extract_deep_emits_param_def() {
321 let src = "def f(x, y: int):\n return x\n";
322 let g = extract_default("m.py", src, &make_anchor(), true);
323 let params: Vec<&[u8]> = g
324 .defs()
325 .filter(|d| d.kind == b"param")
326 .map(|d| d.moniker.as_view().segments().last().unwrap().name)
327 .collect();
328 assert!(params.contains(&&b"x"[..]));
329 assert!(params.contains(&&b"y"[..]));
330 }
331
332 #[test]
333 fn extract_function_docstring_emits_comment_def_parented_on_function() {
334 let src = "def f():\n \"\"\"docstring\"\"\"\n return 0\n";
335 let g = extract_default("m.py", src, &make_anchor(), false);
336 let fn_moniker = MonikerBuilder::new()
337 .project(b"app")
338 .segment(b"lang", b"python")
339 .segment(b"module", b"m")
340 .segment(b"function", b"f()")
341 .build();
342 let docstring_count = g
343 .defs()
344 .filter(|d| d.kind == b"comment")
345 .filter(|d| {
346 d.parent
347 .and_then(|i| g.defs().nth(i))
348 .is_some_and(|p| p.moniker == fn_moniker)
349 })
350 .count();
351 assert_eq!(
352 docstring_count,
353 1,
354 "function docstring must emit one comment def parented on the function. defs: {:?}",
355 g.def_monikers()
356 );
357 }
358
359 #[test]
360 fn extract_class_and_module_docstring_each_emit_one_comment() {
361 let src = "\"\"\"module doc\"\"\"\nclass A:\n \"\"\"class doc\"\"\"\n pass\n";
362 let g = extract_default("m.py", src, &make_anchor(), false);
363 assert_eq!(
364 g.defs().filter(|d| d.kind == b"comment").count(),
365 2,
366 "module-level and class docstrings should each yield one comment def. defs: {:?}",
367 g.def_monikers()
368 );
369 }
370
371 #[test]
372 fn extract_non_docstring_string_at_start_is_not_a_comment() {
373 let src = "x = \"hello\"\n";
374 let g = extract_default("m.py", src, &make_anchor(), false);
375 assert_eq!(
376 g.defs().filter(|d| d.kind == b"comment").count(),
377 0,
378 "string literals that aren't bare expression-statement-strings must NOT be treated as docstrings"
379 );
380 }
381}