use harper_core::parsers::{self, Parser, PlainEnglish};
use harper_core::{Token, TokenKind};
use harper_tree_sitter::TreeSitterMasker;
use tree_sitter::Node;
pub struct PythonParser {
inner: parsers::Mask<TreeSitterMasker, PlainEnglish>,
}
impl PythonParser {
fn node_condition(n: &Node) -> bool {
if n.kind().contains("comment") {
return true;
}
if n.kind() == "string_content"
&& let Some(expr_stmt) = parent_is_expression_statement(n)
&& (is_module_level_docstring(&expr_stmt)
|| is_fn_or_class_docstrings(&expr_stmt)
|| is_attribute_docstring(&expr_stmt))
{
return true;
}
false
}
}
impl Default for PythonParser {
fn default() -> Self {
Self {
inner: parsers::Mask::new(
TreeSitterMasker::new(tree_sitter_python::LANGUAGE.into(), Self::node_condition),
PlainEnglish,
),
}
}
}
impl Parser for PythonParser {
fn parse(&self, source: &[char]) -> Vec<Token> {
let mut tokens = self.inner.parse(source);
let mut prev_kind: Option<&TokenKind> = None;
for token in &mut tokens {
if let TokenKind::Space(v) = &mut token.kind {
if let Some(TokenKind::Newline(_)) = &prev_kind {
*v = 0;
} else {
*v = (*v).clamp(0, 1);
}
}
prev_kind = Some(&token.kind);
}
tokens
}
}
fn parent_is_expression_statement<'a>(node: &Node<'a>) -> Option<Node<'a>> {
node.parent()
.filter(|n| n.kind() == "string")
.and_then(|string_node| string_node.parent())
.filter(|n| n.kind() == "expression_statement")
}
#[inline]
fn is_module_level_docstring(expr_stmt: &Node) -> bool {
expr_stmt.parent().is_some_and(|n| n.kind() == "module")
}
#[inline]
fn is_fn_or_class_docstrings(expr_stmt: &Node) -> bool {
expr_stmt
.parent()
.filter(|n| n.kind() == "block")
.and_then(|n| n.parent())
.is_some_and(|n| n.kind() == "function_definition" || n.kind() == "class_definition")
}
#[inline]
fn is_attribute_docstring(expr_stmt: &Node) -> bool {
expr_stmt
.prev_sibling()
.filter(|s| s.kind() == "expression_statement")
.and_then(|s| s.child(0))
.is_some_and(|c| c.kind() == "assignment")
}