perl_lexer/tokenizer/
util.rs1pub fn find_data_marker_byte_lexed(s: &str) -> Option<usize> {
12 const MARKERS: [&str; 2] = ["__DATA__", "__END__"];
14 if !MARKERS.iter().any(|marker| s.contains(marker)) {
15 return None;
16 }
17
18 use crate::{PerlLexer, TokenType};
19 let mut lx = PerlLexer::new(s);
20 while let Some(tok) = lx.next_token() {
21 match tok.token_type {
22 TokenType::DataMarker(_) => return Some(tok.start),
23 TokenType::EOF => break,
24 _ => {}
25 }
26 }
27 None
28}
29
30pub fn code_slice(text: &str) -> &str {
32 split_code_and_data(text).0
33}
34
35pub fn split_code_and_data(text: &str) -> (&str, Option<&str>) {
40 if let Some(marker_start) = find_data_marker_byte_lexed(text) {
41 (&text[..marker_start], Some(&text[marker_start..]))
42 } else {
43 (text, None)
44 }
45}
46
47#[deprecated(note = "Use find_data_marker_byte_lexed to avoid false positives in heredocs/POD")]
50pub fn find_data_marker_byte(s: &str) -> Option<usize> {
51 find_data_marker_byte_lexed(s)
52}
53
54#[cfg(test)]
55mod tests {
56 use super::*;
57
58 #[test]
59 fn test_find_data_marker_lexed() {
60 assert_eq!(find_data_marker_byte_lexed("print 'hello';\n"), None);
62
63 let src = "print 'hello';\n__DATA__\ndata here";
65 assert_eq!(find_data_marker_byte_lexed(src), Some(15));
66
67 let src2 = "code;\n__END__\ndata";
69 assert_eq!(find_data_marker_byte_lexed(src2), Some(6));
70
71 let src3 = "print '__DATA__';\n";
73 assert_eq!(find_data_marker_byte_lexed(src3), None);
74 }
75
76 #[test]
77 fn test_code_slice() {
78 assert_eq!(code_slice("print 'hello';\n"), "print 'hello';\n");
80
81 let src = "print 'hello';\n__DATA__\ndata here";
83 assert_eq!(code_slice(src), "print 'hello';\n");
84
85 let src2 = "code;\n__END__\ndata";
87 assert_eq!(code_slice(src2), "code;\n");
88 }
89
90 #[test]
91 fn test_split_code_and_data_prefers_first_marker() {
92 let src = "print 'a';\n__DATA__\none\n__END__\ntwo";
93 assert_eq!(split_code_and_data(src), ("print 'a';\n", Some("__DATA__\none\n__END__\ntwo")));
94 }
95
96 #[test]
97 fn test_find_data_marker_ignores_markers_inside_heredoc_and_pod() {
98 let heredoc = "my $x = <<'TXT';\n__DATA__\nTXT\nprint $x;\n";
99 assert_eq!(find_data_marker_byte_lexed(heredoc), None);
100
101 let pod = "=pod\n__END__\n=cut\nprint 'ok';\n";
102 assert_eq!(find_data_marker_byte_lexed(pod), None);
103 }
104
105 #[test]
106 fn test_split_code_and_data() {
107 let no_marker = "print 'hello';\n";
108 assert_eq!(split_code_and_data(no_marker), (no_marker, None));
109
110 let with_data = "print 'hello';\n__DATA__\nvalue";
111 assert_eq!(split_code_and_data(with_data), ("print 'hello';\n", Some("__DATA__\nvalue")));
112
113 let with_end = "code;\n__END__\nvalue";
114 assert_eq!(split_code_and_data(with_end), ("code;\n", Some("__END__\nvalue")));
115 }
116
117 #[test]
118 fn test_find_data_marker_ignores_pod_and_heredoc_content() {
119 let pod = "=head1 NAME\n__DATA__\n=cut\nprint 'done';\n";
120 assert_eq!(find_data_marker_byte_lexed(pod), None);
121
122 let heredoc = "my $text = <<\"TXT\";\n__END__\nTXT\nprint $text;\n";
123 assert_eq!(find_data_marker_byte_lexed(heredoc), None);
124 }
125
126 #[test]
127 fn test_split_code_and_data_prefers_first_lexed_marker() {
128 let src = "print 'prelude';\n__DATA__\nchunk\n__END__\nignored";
129 assert_eq!(
130 split_code_and_data(src),
131 ("print 'prelude';\n", Some("__DATA__\nchunk\n__END__\nignored"))
132 );
133 }
134
135 #[test]
136 #[allow(deprecated)]
138 fn test_find_data_marker_deprecated_matches_lexed_helper() {
139 let src = "say 1;\n__END__\ntrailer";
140 assert_eq!(find_data_marker_byte(src), find_data_marker_byte_lexed(src));
141 }
142}