Skip to main content

perl_tokenizer/
util.rs

1//! Utility functions for the Perl parser
2
3/// Find the byte offset of a __DATA__ or __END__ marker in the source text.
4/// Uses the lexer to avoid false positives in heredocs/POD.
5/// Returns the byte offset of the start of the marker, or None if not found.
6pub fn find_data_marker_byte_lexed(s: &str) -> Option<usize> {
7    use perl_lexer::{PerlLexer, TokenType};
8    let mut lx = PerlLexer::new(s);
9    while let Some(tok) = lx.next_token() {
10        match tok.token_type {
11            TokenType::DataMarker(_) => return Some(tok.start),
12            TokenType::EOF => break,
13            _ => {}
14        }
15    }
16    None
17}
18
19/// Helper to get the code portion of text (before __DATA__/__END__)
20pub fn code_slice(text: &str) -> &str {
21    find_data_marker_byte_lexed(text).map(|i| &text[..i]).unwrap_or(text)
22}
23
24/// Find the byte offset of a __DATA__ or __END__ marker in the source text.
25/// Returns the byte offset of the start of the marker line, or None if not found.
26#[deprecated(note = "Use find_data_marker_byte_lexed to avoid false positives in heredocs/POD")]
27pub fn find_data_marker_byte(s: &str) -> Option<usize> {
28    find_data_marker_byte_lexed(s)
29}
30
31#[cfg(test)]
32mod tests {
33    use super::*;
34
35    #[test]
36    fn test_find_data_marker_lexed() {
37        // No marker
38        assert_eq!(find_data_marker_byte_lexed("print 'hello';\n"), None);
39
40        // __DATA__ marker
41        let src = "print 'hello';\n__DATA__\ndata here";
42        assert_eq!(find_data_marker_byte_lexed(src), Some(15));
43
44        // __END__ marker at line start
45        let src2 = "code;\n__END__\ndata";
46        assert_eq!(find_data_marker_byte_lexed(src2), Some(6));
47
48        // Marker not at line start (should not match)
49        let src3 = "print '__DATA__';\n";
50        assert_eq!(find_data_marker_byte_lexed(src3), None);
51    }
52
53    #[test]
54    fn test_code_slice() {
55        // No marker - returns full text
56        assert_eq!(code_slice("print 'hello';\n"), "print 'hello';\n");
57
58        // With __DATA__ marker
59        let src = "print 'hello';\n__DATA__\ndata here";
60        assert_eq!(code_slice(src), "print 'hello';\n");
61
62        // With __END__ marker
63        let src2 = "code;\n__END__\ndata";
64        assert_eq!(code_slice(src2), "code;\n");
65    }
66}