1use harper_core::parsers::{self, Parser, PlainEnglish};
2use harper_core::{Token, TokenKind};
3use harper_tree_sitter::TreeSitterMasker;
4use tree_sitter::Node;
5
6pub struct PythonParser {
7 inner: parsers::Mask<TreeSitterMasker, PlainEnglish>,
9}
10
11impl PythonParser {
12 fn node_condition(n: &Node) -> bool {
13 if n.kind().contains("comment") {
14 return true;
15 }
16 if n.kind() == "string_content"
17 && let Some(expr_stmt) = parent_is_expression_statement(n)
18 && (is_module_level_docstring(&expr_stmt)
19 || is_fn_or_class_docstrings(&expr_stmt)
20 || is_attribute_docstring(&expr_stmt))
21 {
22 return true;
23 }
24 false
25 }
26}
27
28impl Default for PythonParser {
29 fn default() -> Self {
30 Self {
31 inner: parsers::Mask::new(
32 TreeSitterMasker::new(tree_sitter_python::LANGUAGE.into(), Self::node_condition),
33 PlainEnglish,
34 ),
35 }
36 }
37}
38
39impl Parser for PythonParser {
40 fn parse(&self, source: &[char]) -> Vec<Token> {
41 let mut tokens = self.inner.parse(source);
42
43 let mut prev_kind: Option<&TokenKind> = None;
44
45 for token in &mut tokens {
46 if let TokenKind::Space(v) = &mut token.kind {
47 if let Some(TokenKind::Newline(_)) = &prev_kind {
48 *v = 0;
51 } else {
52 *v = (*v).clamp(0, 1);
53 }
54 }
55
56 prev_kind = Some(&token.kind);
57 }
58
59 tokens
60 }
61}
62
63fn parent_is_expression_statement<'a>(node: &Node<'a>) -> Option<Node<'a>> {
64 node.parent()
65 .filter(|n| n.kind() == "string")
66 .and_then(|string_node| string_node.parent())
67 .filter(|n| n.kind() == "expression_statement")
68}
69
70#[inline]
71fn is_module_level_docstring(expr_stmt: &Node) -> bool {
72 expr_stmt.parent().is_some_and(|n| n.kind() == "module")
74}
75
76#[inline]
77fn is_fn_or_class_docstrings(expr_stmt: &Node) -> bool {
78 expr_stmt
80 .parent()
81 .filter(|n| n.kind() == "block")
82 .and_then(|n| n.parent())
83 .is_some_and(|n| n.kind() == "function_definition" || n.kind() == "class_definition")
84}
85
86#[inline]
87fn is_attribute_docstring(expr_stmt: &Node) -> bool {
88 expr_stmt
90 .prev_sibling()
91 .filter(|s| s.kind() == "expression_statement")
92 .and_then(|s| s.child(0))
93 .is_some_and(|c| c.kind() == "assignment")
94}