dossier_py/
lib.rs

1mod class;
2mod function;
3mod parameter;
4mod symbol;
5mod types;
6
7use dossier_core::tree_sitter::Node;
8use dossier_core::Result;
9
10use std::path::{Path, PathBuf};
11
12use class::Class;
13use function::Function;
14use symbol::{ParseSymbol, Symbol, SymbolContext};
15
16#[derive(Debug, Clone, PartialEq, Default)]
17pub struct PythonParser {}
18
19impl PythonParser {
20    pub fn new() -> Self {
21        Self::default()
22    }
23}
24
25pub const LANGUAGE: &str = "py";
26
27impl dossier_core::DocsParser for PythonParser {
28    fn parse<'a, P: Into<&'a Path>, T: IntoIterator<Item = P>>(
29        &self,
30        paths: T,
31        _ctx: &mut dossier_core::Context,
32    ) -> Result<Vec<dossier_core::Entity>> {
33        let mut symbols = vec![];
34
35        let paths: Vec<PathBuf> = paths
36            .into_iter()
37            .map(|p| p.into().to_owned())
38            .collect::<Vec<_>>();
39
40        paths.iter().for_each(|path| {
41            let code = std::fs::read_to_string(path).unwrap();
42            let ctx = ParserContext::new(path, &code);
43
44            // TODO(Nik): Handle error
45            let mut results = parse_file(ctx).unwrap();
46
47            symbols.append(&mut results);
48        });
49
50        let mut entities = vec![];
51        for symbol in symbols {
52            let entity = symbol.as_entity();
53            entities.push(entity);
54        }
55
56        Ok(entities)
57    }
58}
59
60fn init_parser() -> dossier_core::tree_sitter::Parser {
61    let mut parser = tree_sitter::Parser::new();
62    parser
63        .set_language(tree_sitter_python::language())
64        .expect("Error loading Python language");
65
66    parser
67}
68
69fn parse_file(mut ctx: ParserContext) -> Result<Vec<Symbol>> {
70    let mut parser = init_parser();
71    let tree = parser.parse(ctx.code, None).unwrap();
72
73    let mut cursor = tree.root_node().walk();
74    assert_eq!(cursor.node().kind(), "module");
75    cursor.goto_first_child();
76    let mut out = vec![];
77
78    loop {
79        handle_node(cursor.node(), &mut out, &mut ctx)?;
80
81        if !cursor.goto_next_sibling() {
82            break;
83        }
84    }
85
86    Ok(out)
87}
88
89fn handle_node(node: Node, out: &mut Vec<Symbol>, ctx: &mut ParserContext) -> Result<()> {
90    if Class::matches_node(node) {
91        out.push(Class::parse_symbol(node, ctx).unwrap());
92    } else if Function::matches_node(node) {
93        out.push(Function::parse_symbol(node, ctx).unwrap());
94    } else {
95        // println!("Unhandled node: {}", node.kind());
96    }
97
98    Ok(())
99}
100
101#[derive(Debug)]
102pub(crate) struct ParserContext<'a> {
103    pub file: &'a Path,
104    pub code: &'a str,
105    symbol_context: Vec<SymbolContext>,
106    fqn_parts: Vec<String>,
107}
108
109impl<'a> ParserContext<'a> {
110    pub fn new(file: &'a Path, code: &'a str) -> Self {
111        Self {
112            file,
113            code,
114            symbol_context: vec![],
115            fqn_parts: vec![],
116        }
117    }
118
119    pub fn file(&self) -> &Path {
120        self.file
121    }
122
123    pub fn code(&self) -> &str {
124        self.code
125    }
126
127    pub fn push_context(&mut self, ctx: SymbolContext) {
128        self.symbol_context.push(ctx)
129    }
130
131    pub fn pop_context(&mut self) -> Option<SymbolContext> {
132        self.symbol_context.pop()
133    }
134
135    pub fn symbol_context(&self) -> Option<SymbolContext> {
136        self.symbol_context.last().copied()
137    }
138
139    pub fn construct_fqn(&self, identifier: &str) -> String {
140        let mut out = vec![];
141        let file_part = format!("{}", self.file.display());
142        out.push(file_part.as_str());
143        for part in &self.fqn_parts {
144            out.push(part)
145        }
146        out.push(identifier);
147
148        out.join("::")
149    }
150
151    fn push_fqn(&mut self, identifier: &str) {
152        self.fqn_parts.push(identifier.to_owned());
153    }
154
155    fn pop_fqn(&mut self) -> Option<String> {
156        self.fqn_parts.pop()
157    }
158}
159
160mod helpers {
161    pub(crate) fn process_docs(possible_docs: &str) -> Option<String> {
162        if !possible_docs.starts_with("\"\"\"") {
163            return None;
164        }
165
166        // Remove the triple quotes from the start and end of the docstring
167        let trimmed_docstring = possible_docs
168            .trim_start_matches("\"\"\"")
169            .trim_end_matches("\"\"\"")
170            .trim();
171
172        // Split the trimmed docstring into lines
173        let lines: Vec<&str> = trimmed_docstring.lines().collect();
174
175        // Find the minimum indentation starting from the second line
176        let min_indent = lines
177            .iter()
178            .skip(1)
179            .filter(|line| !line.trim().is_empty())
180            .map(|line| line.len() - line.trim_start().len())
181            .min()
182            .unwrap_or(0);
183
184        // Process each line, removing the minimum indentation from lines other than the first
185        let parsed = lines
186            .iter()
187            .enumerate()
188            .map(|(i, line)| {
189                if i == 0 {
190                    *line
191                } else if line.len() > min_indent {
192                    &line[min_indent..]
193                } else {
194                    line.trim()
195                }
196            })
197            .collect::<Vec<&str>>()
198            .join("\n");
199
200        Some(parsed)
201    }
202}
203
204#[cfg(test)]
205mod test {
206    use super::*;
207    use indoc::indoc;
208
209    #[test]
210    fn parses_a_class() {
211        let source = indoc! {r#"
212        class PyClass:
213            """Documentation for a class.
214
215            More details.
216                Some other stuff!
217            """
218        "#};
219
220        let ctx = ParserContext::new(Path::new("main.py"), source);
221        let symbols = parse_file(ctx).unwrap();
222
223        let class = symbols.first().unwrap().as_class().unwrap();
224        assert_eq!(class.title, "PyClass");
225        assert_eq!(
226            class.documentation.as_deref(),
227            Some("Documentation for a class.\n\nMore details.\n    Some other stuff!")
228        );
229    }
230
231    #[test]
232    fn parses_a_function() {
233        let source = indoc! {r#"
234        def complex(real=0.0, imag=0.0):
235            """
236            Form a complex number.
237            """
238            if imag == 0.0 and real == 0.0:
239                return complex_zero
240        "#};
241
242        let ctx = ParserContext::new(Path::new("main.py"), source);
243        let symbols = parse_file(ctx).unwrap();
244
245        let function = symbols.first().unwrap().as_function().unwrap();
246        assert_eq!(function.title, "complex");
247        assert_eq!(
248            function.documentation.as_deref(),
249            Some("Form a complex number.")
250        );
251    }
252}