use crate::BreakpointError;
use perl_parser::Parser;
use perl_parser::ast::{Node, NodeKind};
use ropey::Rope;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ValidationReason {
BlankLine,
CommentLine,
HeredocInterior,
PodLine,
LineOutOfRange,
ParseError,
InvalidCondition,
}
impl std::fmt::Display for ValidationReason {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ValidationReason::BlankLine => write!(f, "Breakpoint set on blank line"),
ValidationReason::CommentLine => write!(f, "Breakpoint set on comment or blank line"),
ValidationReason::HeredocInterior => write!(f, "Breakpoint set inside heredoc content"),
ValidationReason::PodLine => write!(f, "Breakpoint set inside POD documentation"),
ValidationReason::LineOutOfRange => write!(f, "Line number exceeds file length"),
ValidationReason::ParseError => write!(f, "Unable to parse source file"),
ValidationReason::InvalidCondition => {
write!(f, "Conditional breakpoint expression is invalid")
}
}
}
}
#[derive(Debug, Clone)]
pub struct BreakpointValidation {
pub verified: bool,
pub line: i64,
pub column: Option<i64>,
pub reason: Option<ValidationReason>,
pub message: Option<String>,
}
impl BreakpointValidation {
pub fn verified(line: i64, column: Option<i64>) -> Self {
Self { verified: true, line, column, reason: None, message: None }
}
pub fn rejected(line: i64, reason: ValidationReason) -> Self {
let message = Some(reason.to_string());
Self { verified: false, line, column: None, reason: Some(reason), message }
}
pub fn adjusted(new_line: i64, reason: ValidationReason) -> Self {
let message = Some(format!("{}, adjusted to line {}", reason, new_line));
Self { verified: true, line: new_line, column: None, reason: Some(reason), message }
}
}
pub trait BreakpointValidator {
fn validate(&self, line: i64) -> BreakpointValidation;
fn validate_with_column(&self, line: i64, column: Option<i64>) -> BreakpointValidation;
fn is_executable_line(&self, line: i64) -> bool;
fn validate_condition(&self, line: i64, condition: &str) -> BreakpointValidation;
}
#[derive(Debug, Clone, Copy)]
struct ByteRange {
start: usize,
end: usize,
}
pub struct AstBreakpointValidator {
ast: Node,
rope: Rope,
source: String,
pod_regions: Vec<ByteRange>,
}
impl AstBreakpointValidator {
pub fn new(source: &str) -> Result<Self, BreakpointError> {
let mut parser = Parser::new(source);
let ast = parser.parse().map_err(|e| BreakpointError::ParseError(format!("{:?}", e)))?;
let rope = Rope::from_str(source);
let pod_regions = Self::find_pod_regions(source);
Ok(Self { ast, rope, source: source.to_string(), pod_regions })
}
fn find_pod_regions(source: &str) -> Vec<ByteRange> {
let mut regions = Vec::new();
let mut pod_start: Option<usize> = None;
let mut offset = 0;
for line in source.split('\n') {
let trimmed = line.trim_end_matches('\r');
if pod_start.is_some() {
if trimmed == "=cut" {
let end = offset + line.len();
if let Some(start) = pod_start.take() {
regions.push(ByteRange { start, end });
}
}
} else if Self::is_pod_directive(trimmed) {
pod_start = Some(offset);
}
offset += line.len() + 1; }
if let Some(start) = pod_start {
regions.push(ByteRange { start, end: source.len() });
}
regions
}
fn is_pod_directive(line: &str) -> bool {
if !line.starts_with('=') {
return false;
}
let after_eq = &line[1..];
after_eq.starts_with(|c: char| c.is_ascii_alphabetic())
}
fn is_inside_pod_region(&self, byte_offset: usize) -> bool {
self.pod_regions.iter().any(|r| byte_offset >= r.start && byte_offset < r.end)
}
fn line_byte_range(&self, line: i64) -> Option<(usize, usize)> {
let line_idx = (line - 1).max(0) as usize;
if line_idx >= self.rope.len_lines() {
return None;
}
let line_start = self.rope.line_to_byte(line_idx);
let line_end = if line_idx + 1 < self.rope.len_lines() {
self.rope.line_to_byte(line_idx + 1)
} else {
self.rope.len_bytes()
};
Some((line_start, line_end))
}
fn is_comment_or_blank_line(&self, line_start: usize, line_end: usize) -> bool {
let line_text = &self.source[line_start..line_end.min(self.source.len())];
if line_text.trim().is_empty() {
return true;
}
if line_text.trim_start().starts_with('#') {
return true;
}
self.has_only_comments_in_range(line_start, line_end)
}
fn has_only_comments_in_range(&self, start: usize, end: usize) -> bool {
self.has_only_comments_in_range_node(&self.ast, start, end)
}
fn has_only_comments_in_range_node(&self, node: &Node, start: usize, end: usize) -> bool {
if node.location.start >= end || node.location.end <= start {
return false;
}
match &node.kind {
NodeKind::Program { statements } => {
let nodes_in_range: Vec<_> = statements
.iter()
.filter(|s| s.location.start < end && s.location.end > start)
.collect();
nodes_in_range.is_empty()
}
_ => false,
}
}
fn is_inside_heredoc_interior(&self, byte_offset: usize) -> bool {
self.is_inside_heredoc_interior_node(&self.ast, byte_offset)
}
#[allow(clippy::only_used_in_recursion)]
fn is_inside_heredoc_interior_node(&self, node: &Node, byte_offset: usize) -> bool {
if let NodeKind::Heredoc { body_span: Some(span), .. } = &node.kind {
if byte_offset >= span.start && byte_offset < span.end {
return true;
}
}
let mut found = false;
node.for_each_child(|child| {
if !found && self.is_inside_heredoc_interior_node(child, byte_offset) {
found = true;
}
});
found
}
}
impl BreakpointValidator for AstBreakpointValidator {
fn validate(&self, line: i64) -> BreakpointValidation {
self.validate_with_column(line, None)
}
fn validate_with_column(&self, line: i64, column: Option<i64>) -> BreakpointValidation {
let Some((line_start, line_end)) = self.line_byte_range(line) else {
return BreakpointValidation::rejected(line, ValidationReason::LineOutOfRange);
};
if self.is_inside_heredoc_interior(line_start) {
return BreakpointValidation::rejected(line, ValidationReason::HeredocInterior);
}
if self.is_inside_pod_region(line_start) {
return BreakpointValidation::rejected(line, ValidationReason::PodLine);
}
if self.is_comment_or_blank_line(line_start, line_end) {
let line_text = &self.source[line_start..line_end.min(self.source.len())];
let reason = if line_text.trim().is_empty() {
ValidationReason::BlankLine
} else {
ValidationReason::CommentLine
};
return BreakpointValidation::rejected(line, reason);
}
BreakpointValidation::verified(line, column)
}
fn is_executable_line(&self, line: i64) -> bool {
self.validate(line).verified
}
fn validate_condition(&self, line: i64, condition: &str) -> BreakpointValidation {
let line_result = self.validate(line);
if !line_result.verified {
return line_result;
}
let trimmed = condition.trim();
if trimmed.is_empty() {
return BreakpointValidation::rejected(line, ValidationReason::InvalidCondition);
}
if Self::condition_has_dangerous_construct(trimmed) {
return BreakpointValidation::rejected(line, ValidationReason::InvalidCondition);
}
let wrapped = format!("if ({trimmed}) {{ 1; }}");
let mut parser = Parser::new(&wrapped);
match parser.parse() {
Ok(_) => BreakpointValidation::verified(line, None),
Err(_) => BreakpointValidation::rejected(line, ValidationReason::InvalidCondition),
}
}
}
impl AstBreakpointValidator {
fn condition_has_dangerous_construct(condition: &str) -> bool {
if condition.contains("system(")
|| condition.contains("exec(")
|| condition.contains("qx(")
|| condition.contains("qx{")
|| condition.contains("qx/")
{
return true;
}
if condition.contains('`') {
return true;
}
if condition.contains("unlink(")
|| condition.contains("rename(")
|| condition.contains("rmdir(")
|| condition.contains("mkdir(")
{
return true;
}
let eval_pattern = condition.find("eval");
if let Some(idx) = eval_pattern {
let after = &condition[idx + 4..];
let after_trimmed = after.trim_start();
if after_trimmed.starts_with('"')
|| after_trimmed.starts_with('\'')
|| after_trimmed.starts_with('$')
{
return true;
}
}
false
}
}
#[cfg(test)]
mod tests {
use super::*;
use perl_tdd_support::must;
#[test]
fn test_validate_executable_line() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(1);
assert!(result.verified);
assert_eq!(result.line, 1);
assert!(result.reason.is_none());
}
#[test]
fn test_validate_comment_line() {
let source = "# This is a comment\nmy $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(1);
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::CommentLine));
}
#[test]
fn test_validate_blank_line() {
let source = "my $x = 1;\n\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(2);
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::BlankLine));
}
#[test]
fn test_validate_line_out_of_range() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(100);
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::LineOutOfRange));
}
#[test]
fn test_is_executable_line() {
let source = "# comment\nmy $x = 1;\n\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
assert!(!validator.is_executable_line(1)); assert!(validator.is_executable_line(2)); assert!(!validator.is_executable_line(3)); assert!(validator.is_executable_line(4)); }
#[test]
fn test_pod_head1_line_rejected() {
let source = "my $x = 1;\n\n=head1 NAME\n\nSome pod text\n\n=cut\n\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(3);
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::PodLine));
}
#[test]
fn test_pod_body_text_rejected() {
let source = "my $x = 1;\n\n=head1 NAME\n\nSome pod text\n\n=cut\n\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(5);
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::PodLine));
}
#[test]
fn test_pod_cut_line_rejected() {
let source = "my $x = 1;\n\n=head1 NAME\n\nSome pod text\n\n=cut\n\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate(7);
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::PodLine));
}
#[test]
fn test_code_after_pod_is_executable() {
let source = "my $x = 1;\n\n=head1 NAME\n\nSome pod text\n\n=cut\n\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
assert!(validator.is_executable_line(1));
assert!(validator.is_executable_line(9));
}
#[test]
fn test_pod_without_cut_extends_to_eof() {
let source = "my $x = 1;\n=pod\nThis is pod documentation\nThat never ends\n";
let validator = must(AstBreakpointValidator::new(source));
assert!(validator.is_executable_line(1));
let r2 = validator.validate(2);
assert!(!r2.verified);
assert_eq!(r2.reason, Some(ValidationReason::PodLine));
let r3 = validator.validate(3);
assert!(!r3.verified);
assert_eq!(r3.reason, Some(ValidationReason::PodLine));
let r4 = validator.validate(4);
assert!(!r4.verified);
assert_eq!(r4.reason, Some(ValidationReason::PodLine));
}
#[test]
fn test_multiple_pod_sections() {
let source = "my $a = 1;\n\n=head1 SYNOPSIS\n\nFirst section\n\n=cut\n\nmy $b = 2;\n\n=head2 METHODS\n\nSecond section\n\n=cut\n\nmy $c = 3;\n";
let validator = must(AstBreakpointValidator::new(source));
assert!(validator.is_executable_line(1)); assert_eq!(validator.validate(3).reason, Some(ValidationReason::PodLine)); assert_eq!(validator.validate(5).reason, Some(ValidationReason::PodLine)); assert_eq!(validator.validate(7).reason, Some(ValidationReason::PodLine)); assert!(validator.is_executable_line(9)); assert_eq!(validator.validate(11).reason, Some(ValidationReason::PodLine)); assert!(validator.is_executable_line(17)); }
#[test]
fn test_condition_valid_comparison() {
let source = "my $x = 1;\nmy $y = 2;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "$x > 5");
assert!(result.verified);
}
#[test]
fn test_condition_valid_equality() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "$x == 42");
assert!(result.verified);
}
#[test]
fn test_condition_valid_string_eq() {
let source = "my $name = 'test';\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "$name eq 'hello'");
assert!(result.verified);
}
#[test]
fn test_condition_empty_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_whitespace_only_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, " ");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_system_call_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "system('rm -rf /')");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_backtick_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "`ls`");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_exec_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "exec('/bin/sh')");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_unlink_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "unlink('/tmp/foo')");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_eval_string_rejected() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "eval \"dangerous code\"");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::InvalidCondition));
}
#[test]
fn test_condition_on_invalid_line_rejected() {
let source = "# comment\nmy $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "$x > 0");
assert!(!result.verified);
assert_eq!(result.reason, Some(ValidationReason::CommentLine));
}
#[test]
fn test_condition_defined_check() {
let source = "my $x = undef;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "defined($x)");
assert!(result.verified);
}
#[test]
fn test_condition_logical_operators() {
let source = "my $x = 1;\n";
let validator = must(AstBreakpointValidator::new(source));
let result = validator.validate_condition(1, "$x > 0 && $x < 100");
assert!(result.verified);
}
#[test]
fn test_is_pod_directive_basic() {
assert!(AstBreakpointValidator::is_pod_directive("=head1 NAME"));
assert!(AstBreakpointValidator::is_pod_directive("=head2 METHODS"));
assert!(AstBreakpointValidator::is_pod_directive("=pod"));
assert!(AstBreakpointValidator::is_pod_directive("=cut"));
assert!(AstBreakpointValidator::is_pod_directive("=over 4"));
assert!(AstBreakpointValidator::is_pod_directive("=back"));
assert!(AstBreakpointValidator::is_pod_directive("=begin html"));
assert!(AstBreakpointValidator::is_pod_directive("=end html"));
assert!(AstBreakpointValidator::is_pod_directive("=for text"));
assert!(AstBreakpointValidator::is_pod_directive("=encoding utf8"));
assert!(AstBreakpointValidator::is_pod_directive("=item *"));
}
#[test]
fn test_is_pod_directive_rejects_non_pod() {
assert!(!AstBreakpointValidator::is_pod_directive("my $x = 1;"));
assert!(!AstBreakpointValidator::is_pod_directive("# comment"));
assert!(!AstBreakpointValidator::is_pod_directive(""));
assert!(!AstBreakpointValidator::is_pod_directive("=123"));
assert!(!AstBreakpointValidator::is_pod_directive("=="));
}
#[test]
fn test_find_pod_regions_empty() {
let regions = AstBreakpointValidator::find_pod_regions("my $x = 1;\n");
assert!(regions.is_empty());
}
#[test]
fn test_find_pod_regions_single_section() {
let source = "my $x = 1;\n=head1 NAME\nTest\n=cut\nmy $y = 2;\n";
let regions = AstBreakpointValidator::find_pod_regions(source);
assert_eq!(regions.len(), 1);
let text = &source[regions[0].start..regions[0].end];
assert!(text.starts_with("=head1"));
assert!(text.ends_with("=cut"));
}
#[test]
fn test_find_pod_regions_unclosed() {
let source = "my $x = 1;\n=pod\nSome docs\n";
let regions = AstBreakpointValidator::find_pod_regions(source);
assert_eq!(regions.len(), 1);
assert_eq!(regions[0].end, source.len());
}
}