use std::{fmt, path::Path};
use tree_sitter::{Language, Parser};
#[non_exhaustive]
pub struct PerlParser {
parser: Parser,
}
#[non_exhaustive]
#[derive(Debug)]
pub enum ParsePerlError {
LanguageSetup(tree_sitter::LanguageError),
ParseReturnedNone,
Io(std::io::Error),
}
impl fmt::Display for ParsePerlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::LanguageSetup(error) => {
write!(f, "failed to configure parser language: {error:?}")
}
Self::ParseReturnedNone => write!(f, "tree-sitter returned no parse tree"),
Self::Io(error) => write!(f, "failed to read Perl source file: {error}"),
}
}
}
impl std::error::Error for ParsePerlError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::LanguageSetup(error) => Some(error),
Self::ParseReturnedNone => None,
Self::Io(error) => Some(error),
}
}
}
impl From<tree_sitter::LanguageError> for ParsePerlError {
fn from(value: tree_sitter::LanguageError) -> Self {
Self::LanguageSetup(value)
}
}
impl From<std::io::Error> for ParsePerlError {
fn from(value: std::io::Error) -> Self {
Self::Io(value)
}
}
pub fn language() -> Language {
unsafe { tree_sitter_perl() }
}
pub fn try_create_parser() -> Result<Parser, tree_sitter::LanguageError> {
let mut parser = Parser::new();
parser.set_language(&language())?;
Ok(parser)
}
impl PerlParser {
pub fn new() -> Result<Self, tree_sitter::LanguageError> {
Ok(Self { parser: try_create_parser()? })
}
pub fn parse_bytes(&mut self, code: &[u8]) -> Result<tree_sitter::Tree, ParsePerlError> {
try_parse_with_parser(&mut self.parser, code)
}
pub fn parse_code(&mut self, code: &str) -> Result<tree_sitter::Tree, ParsePerlError> {
self.parse_bytes(code.as_bytes())
}
}
fn try_parse_with_parser(
parser: &mut Parser,
code: &[u8],
) -> Result<tree_sitter::Tree, ParsePerlError> {
match parser.parse(code, None) {
Some(tree) => Ok(tree),
None => Err(ParsePerlError::ParseReturnedNone),
}
}
pub fn create_parser() -> Parser {
let mut parser = Parser::new();
let _ = parser.set_language(&language());
parser
}
pub fn parse_perl_bytes(code: &[u8]) -> Result<tree_sitter::Tree, Box<dyn std::error::Error>> {
try_parse_perl_bytes(code).map_err(Into::into)
}
pub fn try_parse_perl_bytes(code: &[u8]) -> Result<tree_sitter::Tree, ParsePerlError> {
let mut parser = try_create_parser().map_err(ParsePerlError::LanguageSetup)?;
try_parse_with_parser(&mut parser, code)
}
pub fn parse_perl_bytes_with_parser(
parser: &mut Parser,
code: &[u8],
) -> Result<tree_sitter::Tree, Box<dyn std::error::Error>> {
try_parse_perl_bytes_with_parser(parser, code).map_err(Into::into)
}
pub fn try_parse_perl_bytes_with_parser(
parser: &mut Parser,
code: &[u8],
) -> Result<tree_sitter::Tree, ParsePerlError> {
try_parse_with_parser(parser, code)
}
pub fn parse_perl_code(code: &str) -> Result<tree_sitter::Tree, Box<dyn std::error::Error>> {
try_parse_perl_code(code).map_err(Into::into)
}
pub fn try_parse_perl_code(code: &str) -> Result<tree_sitter::Tree, ParsePerlError> {
try_parse_perl_bytes(code.as_bytes())
}
pub fn parse_perl_code_with_parser(
parser: &mut Parser,
code: &str,
) -> Result<tree_sitter::Tree, Box<dyn std::error::Error>> {
try_parse_perl_code_with_parser(parser, code).map_err(Into::into)
}
pub fn try_parse_perl_code_with_parser(
parser: &mut Parser,
code: &str,
) -> Result<tree_sitter::Tree, ParsePerlError> {
try_parse_perl_bytes_with_parser(parser, code.as_bytes())
}
pub fn parse_perl_file<P: AsRef<Path>>(
path: P,
) -> Result<tree_sitter::Tree, Box<dyn std::error::Error>> {
try_parse_perl_file(path).map_err(Into::into)
}
pub fn try_parse_perl_file<P: AsRef<Path>>(path: P) -> Result<tree_sitter::Tree, ParsePerlError> {
let code = std::fs::read(path).map_err(ParsePerlError::Io)?;
try_parse_perl_bytes(&code)
}
pub fn get_scanner_config() -> &'static str {
"c-scanner"
}
#[cfg(test)]
mod tests {
use super::*;
use tree_sitter::{Query, QueryCursor, StreamingIterator};
const INJECTIONS_QUERY: &str = include_str!("../../../tree-sitter-perl/queries/injections.scm");
fn capture_text<'a>(
query: &'a Query,
code: &'a str,
capture: tree_sitter::QueryCapture<'a>,
) -> Option<(&'a str, &'a str)> {
let name = query.capture_names().get(capture.index as usize)?;
let text = capture.node.utf8_text(code.as_bytes()).ok()?;
Some((*name, text))
}
#[test]
fn test_language_loading() {
let lang = language();
let count = lang.node_kind_count();
println!("C implementation node kind count: {}", count);
assert!(count > 0);
}
#[test]
fn test_basic_parsing() -> Result<(), Box<dyn std::error::Error>> {
let code = "my $var = 'hello';";
let tree = parse_perl_code(code)?;
assert!(!tree.root_node().has_error());
Ok(())
}
#[test]
fn test_parse_bytes() -> Result<(), Box<dyn std::error::Error>> {
let code = b"my $var = 'hello';";
let tree = parse_perl_bytes(code)?;
assert!(!tree.root_node().has_error());
Ok(())
}
#[test]
fn test_parse_bytes_with_reused_parser() -> Result<(), Box<dyn std::error::Error>> {
let mut parser = try_create_parser()?;
let first = parse_perl_bytes_with_parser(&mut parser, b"my $x = 1;")?;
assert!(!first.root_node().has_error());
let second = parse_perl_bytes_with_parser(&mut parser, b"my $y = 2;")?;
assert!(!second.root_node().has_error());
Ok(())
}
#[test]
fn test_parse_code_with_reused_parser() -> Result<(), Box<dyn std::error::Error>> {
let mut parser = try_create_parser()?;
let first = parse_perl_code_with_parser(&mut parser, "my $name = 'Perl';")?;
assert!(!first.root_node().has_error());
let second = parse_perl_code_with_parser(&mut parser, "print $name;")?;
assert!(!second.root_node().has_error());
Ok(())
}
#[test]
fn test_typed_parse_none_error_variant_is_emitted() {
let mut parser = Parser::new();
let result = try_parse_with_parser(&mut parser, b"my $var = 'hello';");
assert!(matches!(result, Err(ParsePerlError::ParseReturnedNone)));
}
#[test]
fn test_typed_language_setup_error_variant_mapping() {
let error = ParsePerlError::from(tree_sitter::LanguageError::Version(0));
assert!(matches!(error, ParsePerlError::LanguageSetup(_)));
}
#[test]
fn test_parser_creation() {
let parser = create_parser();
assert!(parser.language().is_some());
}
#[test]
fn test_reusable_parser_parses_multiple_inputs() -> Result<(), Box<dyn std::error::Error>> {
let mut parser = PerlParser::new()?;
let first = parser.parse_code("my $x = 1;")?;
let second = parser.parse_code("my $y = 2;")?;
assert!(!first.root_node().has_error());
assert!(!second.root_node().has_error());
Ok(())
}
#[test]
fn test_reusable_parser_error_state_does_not_bleed() -> Result<(), Box<dyn std::error::Error>> {
let mut parser = PerlParser::new()?;
let bad_tree = parser.parse_code("my $x = @@@@@@;")?;
assert!(bad_tree.root_node().has_error(), "invalid Perl should produce error nodes");
let good_tree = parser.parse_code("my $y = 42;")?;
assert!(!good_tree.root_node().has_error(), "valid Perl after error parse must be clean");
Ok(())
}
#[test]
fn test_inline_cpp_injection_query_matches_heredoc_body()
-> Result<(), Box<dyn std::error::Error>> {
let code = "use Inline CPP => <<'END_CPP';\n#include <string>\nclass Greet {};\nEND_CPP\n";
let tree = parse_perl_code(code)?;
let query = Query::new(&language(), INJECTIONS_QUERY)?;
let mut cursor = QueryCursor::new();
let mut matched = false;
let mut matches = cursor.matches(&query, tree.root_node(), code.as_bytes());
while let Some(m) = matches.next() {
let mut saw_inline_package = false;
let mut saw_inline_language = false;
let mut saw_injection_content = false;
for capture in m.captures {
if let Some((name, text)) = capture_text(&query, code, *capture) {
match name {
"inline.package" => saw_inline_package = text == "Inline",
"inline.language" => saw_inline_language = text == "CPP",
"injection.content" => {
saw_injection_content = capture.node.kind() == "heredoc_content"
&& text.contains("#include <string>");
}
_ => {}
}
}
}
if saw_inline_package && saw_inline_language && saw_injection_content {
matched = true;
break;
}
}
assert!(matched, "expected Inline::CPP heredoc to match the injection query");
Ok(())
}
#[test]
fn test_parse_bytes_with_utf8_bom_returns_tree() -> Result<(), Box<dyn std::error::Error>> {
let bom_source = b"\xEF\xBB\xBFmy $x = 1;";
let tree = parse_perl_bytes(bom_source)?;
assert_eq!(tree.root_node().kind(), "source_file");
Ok(())
}
#[test]
fn test_parse_bytes_empty_source() -> Result<(), Box<dyn std::error::Error>> {
let tree = parse_perl_bytes(b"")?;
assert_eq!(tree.root_node().kind(), "source_file");
Ok(())
}
}
unsafe extern "C" {
fn tree_sitter_perl() -> Language;
}