harper_python/
lib.rs

1use harper_core::parsers::{self, Parser, PlainEnglish};
2use harper_core::{Token, TokenKind};
3use harper_tree_sitter::TreeSitterMasker;
4use tree_sitter::Node;
5
6pub struct PythonParser {
7    /// Used to grab the text nodes.
8    inner: parsers::Mask<TreeSitterMasker, PlainEnglish>,
9}
10
11impl PythonParser {
12    fn node_condition(n: &Node) -> bool {
13        if n.kind().contains("comment") {
14            return true;
15        }
16        if n.kind() == "string_content"
17            && let Some(expr_stmt) = parent_is_expression_statement(n)
18            && (is_module_level_docstring(&expr_stmt)
19                || is_fn_or_class_docstrings(&expr_stmt)
20                || is_attribute_docstring(&expr_stmt))
21        {
22            return true;
23        }
24        false
25    }
26}
27
28impl Default for PythonParser {
29    fn default() -> Self {
30        Self {
31            inner: parsers::Mask::new(
32                TreeSitterMasker::new(tree_sitter_python::LANGUAGE.into(), Self::node_condition),
33                PlainEnglish,
34            ),
35        }
36    }
37}
38
39impl Parser for PythonParser {
40    fn parse(&self, source: &[char]) -> Vec<Token> {
41        let mut tokens = self.inner.parse(source);
42
43        let mut prev_kind: Option<&TokenKind> = None;
44
45        for token in &mut tokens {
46            if let TokenKind::Space(v) = &mut token.kind {
47                if let Some(TokenKind::Newline(_)) = &prev_kind {
48                    // Lines in multiline docstrings are indented with spaces to match the current level.
49                    // We need to remove such spaces to avoid triggering French spaces rule.
50                    *v = 0;
51                } else {
52                    *v = (*v).clamp(0, 1);
53                }
54            }
55
56            prev_kind = Some(&token.kind);
57        }
58
59        tokens
60    }
61}
62
63fn parent_is_expression_statement<'a>(node: &Node<'a>) -> Option<Node<'a>> {
64    node.parent()
65        .filter(|n| n.kind() == "string")
66        .and_then(|string_node| string_node.parent())
67        .filter(|n| n.kind() == "expression_statement")
68}
69
70#[inline]
71fn is_module_level_docstring(expr_stmt: &Node) -> bool {
72    // (module . (expression_statement (string)))
73    expr_stmt.parent().is_some_and(|n| n.kind() == "module")
74}
75
76#[inline]
77fn is_fn_or_class_docstrings(expr_stmt: &Node) -> bool {
78    // (class/func_definition body: (block . (expression_statement (string))))
79    expr_stmt
80        .parent()
81        .filter(|n| n.kind() == "block")
82        .and_then(|n| n.parent())
83        .is_some_and(|n| n.kind() == "function_definition" || n.kind() == "class_definition")
84}
85
86#[inline]
87fn is_attribute_docstring(expr_stmt: &Node) -> bool {
88    // ((expression_statement (assignment)) . (expression_statement (string)))
89    expr_stmt
90        .prev_sibling()
91        .filter(|s| s.kind() == "expression_statement")
92        .and_then(|s| s.child(0))
93        .is_some_and(|c| c.kind() == "assignment")
94}