1mod class;
2mod function;
3mod parameter;
4mod symbol;
5mod types;
6
7use dossier_core::tree_sitter::Node;
8use dossier_core::Result;
9
10use std::path::{Path, PathBuf};
11
12use class::Class;
13use function::Function;
14use symbol::{ParseSymbol, Symbol, SymbolContext};
15
16#[derive(Debug, Clone, PartialEq, Default)]
17pub struct PythonParser {}
18
19impl PythonParser {
20 pub fn new() -> Self {
21 Self::default()
22 }
23}
24
25pub const LANGUAGE: &str = "py";
26
27impl dossier_core::DocsParser for PythonParser {
28 fn parse<'a, P: Into<&'a Path>, T: IntoIterator<Item = P>>(
29 &self,
30 paths: T,
31 _ctx: &mut dossier_core::Context,
32 ) -> Result<Vec<dossier_core::Entity>> {
33 let mut symbols = vec![];
34
35 let paths: Vec<PathBuf> = paths
36 .into_iter()
37 .map(|p| p.into().to_owned())
38 .collect::<Vec<_>>();
39
40 paths.iter().for_each(|path| {
41 let code = std::fs::read_to_string(path).unwrap();
42 let ctx = ParserContext::new(path, &code);
43
44 let mut results = parse_file(ctx).unwrap();
46
47 symbols.append(&mut results);
48 });
49
50 let mut entities = vec![];
51 for symbol in symbols {
52 let entity = symbol.as_entity();
53 entities.push(entity);
54 }
55
56 Ok(entities)
57 }
58}
59
60fn init_parser() -> dossier_core::tree_sitter::Parser {
61 let mut parser = tree_sitter::Parser::new();
62 parser
63 .set_language(tree_sitter_python::language())
64 .expect("Error loading Python language");
65
66 parser
67}
68
69fn parse_file(mut ctx: ParserContext) -> Result<Vec<Symbol>> {
70 let mut parser = init_parser();
71 let tree = parser.parse(ctx.code, None).unwrap();
72
73 let mut cursor = tree.root_node().walk();
74 assert_eq!(cursor.node().kind(), "module");
75 cursor.goto_first_child();
76 let mut out = vec![];
77
78 loop {
79 handle_node(cursor.node(), &mut out, &mut ctx)?;
80
81 if !cursor.goto_next_sibling() {
82 break;
83 }
84 }
85
86 Ok(out)
87}
88
89fn handle_node(node: Node, out: &mut Vec<Symbol>, ctx: &mut ParserContext) -> Result<()> {
90 if Class::matches_node(node) {
91 out.push(Class::parse_symbol(node, ctx).unwrap());
92 } else if Function::matches_node(node) {
93 out.push(Function::parse_symbol(node, ctx).unwrap());
94 } else {
95 }
97
98 Ok(())
99}
100
101#[derive(Debug)]
102pub(crate) struct ParserContext<'a> {
103 pub file: &'a Path,
104 pub code: &'a str,
105 symbol_context: Vec<SymbolContext>,
106 fqn_parts: Vec<String>,
107}
108
109impl<'a> ParserContext<'a> {
110 pub fn new(file: &'a Path, code: &'a str) -> Self {
111 Self {
112 file,
113 code,
114 symbol_context: vec![],
115 fqn_parts: vec![],
116 }
117 }
118
119 pub fn file(&self) -> &Path {
120 self.file
121 }
122
123 pub fn code(&self) -> &str {
124 self.code
125 }
126
127 pub fn push_context(&mut self, ctx: SymbolContext) {
128 self.symbol_context.push(ctx)
129 }
130
131 pub fn pop_context(&mut self) -> Option<SymbolContext> {
132 self.symbol_context.pop()
133 }
134
135 pub fn symbol_context(&self) -> Option<SymbolContext> {
136 self.symbol_context.last().copied()
137 }
138
139 pub fn construct_fqn(&self, identifier: &str) -> String {
140 let mut out = vec![];
141 let file_part = format!("{}", self.file.display());
142 out.push(file_part.as_str());
143 for part in &self.fqn_parts {
144 out.push(part)
145 }
146 out.push(identifier);
147
148 out.join("::")
149 }
150
151 fn push_fqn(&mut self, identifier: &str) {
152 self.fqn_parts.push(identifier.to_owned());
153 }
154
155 fn pop_fqn(&mut self) -> Option<String> {
156 self.fqn_parts.pop()
157 }
158}
159
160mod helpers {
161 pub(crate) fn process_docs(possible_docs: &str) -> Option<String> {
162 if !possible_docs.starts_with("\"\"\"") {
163 return None;
164 }
165
166 let trimmed_docstring = possible_docs
168 .trim_start_matches("\"\"\"")
169 .trim_end_matches("\"\"\"")
170 .trim();
171
172 let lines: Vec<&str> = trimmed_docstring.lines().collect();
174
175 let min_indent = lines
177 .iter()
178 .skip(1)
179 .filter(|line| !line.trim().is_empty())
180 .map(|line| line.len() - line.trim_start().len())
181 .min()
182 .unwrap_or(0);
183
184 let parsed = lines
186 .iter()
187 .enumerate()
188 .map(|(i, line)| {
189 if i == 0 {
190 *line
191 } else if line.len() > min_indent {
192 &line[min_indent..]
193 } else {
194 line.trim()
195 }
196 })
197 .collect::<Vec<&str>>()
198 .join("\n");
199
200 Some(parsed)
201 }
202}
203
204#[cfg(test)]
205mod test {
206 use super::*;
207 use indoc::indoc;
208
209 #[test]
210 fn parses_a_class() {
211 let source = indoc! {r#"
212 class PyClass:
213 """Documentation for a class.
214
215 More details.
216 Some other stuff!
217 """
218 "#};
219
220 let ctx = ParserContext::new(Path::new("main.py"), source);
221 let symbols = parse_file(ctx).unwrap();
222
223 let class = symbols.first().unwrap().as_class().unwrap();
224 assert_eq!(class.title, "PyClass");
225 assert_eq!(
226 class.documentation.as_deref(),
227 Some("Documentation for a class.\n\nMore details.\n Some other stuff!")
228 );
229 }
230
231 #[test]
232 fn parses_a_function() {
233 let source = indoc! {r#"
234 def complex(real=0.0, imag=0.0):
235 """
236 Form a complex number.
237 """
238 if imag == 0.0 and real == 0.0:
239 return complex_zero
240 "#};
241
242 let ctx = ParserContext::new(Path::new("main.py"), source);
243 let symbols = parse_file(ctx).unwrap();
244
245 let function = symbols.first().unwrap().as_function().unwrap();
246 assert_eq!(function.title, "complex");
247 assert_eq!(
248 function.documentation.as_deref(),
249 Some("Form a complex number.")
250 );
251 }
252}