Skip to main content

stryke/
data_section.rs

1//! Split `__DATA__` from program source (line must equal `__DATA__` after trim).
2
3/// Truncate at the first line equal to `__END__` or `__DATA__` (after [`str::trim`]). Perl stops
4/// compiling there; required `.pm` files often put pod after `__END__`.
5pub fn strip_perl_end_marker(content: &str) -> &str {
6    let mut start = 0usize;
7    for chunk in content.split_inclusive('\n') {
8        let line = chunk.strip_suffix('\n').unwrap_or(chunk);
9        if line.trim() == "__END__" || line.trim() == "__DATA__" {
10            return &content[..start];
11        }
12        start += chunk.len();
13    }
14    content
15}
16
17/// Returns `(program_text_before_marker, Some(data bytes after marker))` or `(full, None)`.
18pub fn split_data_section(content: &str) -> (String, Option<Vec<u8>>) {
19    let mut prog = String::new();
20    let mut in_data = false;
21    let mut data_lines: Vec<&str> = Vec::new();
22
23    for line in content.lines() {
24        if !in_data && line.trim_end() == "__DATA__" {
25            in_data = true;
26            continue;
27        }
28        if in_data {
29            data_lines.push(line);
30        } else {
31            if !prog.is_empty() {
32                prog.push('\n');
33            }
34            prog.push_str(line);
35        }
36    }
37
38    if in_data {
39        let mut data = data_lines.join("\n");
40        if !data.is_empty() {
41            data.push('\n');
42        }
43        (prog, Some(data.into_bytes()))
44    } else {
45        (content.to_string(), None)
46    }
47}
48
49#[cfg(test)]
50mod tests {
51    use super::{split_data_section, strip_perl_end_marker};
52
53    #[test]
54    fn strip_end_before_pod() {
55        let s = "1;\n__END__\n=pod\n";
56        assert_eq!(strip_perl_end_marker(s), "1;\n");
57    }
58
59    #[test]
60    fn strip_data_truncates_like_end() {
61        let s = "use strict;\n__DATA__\ntrailing\n";
62        assert_eq!(strip_perl_end_marker(s), "use strict;\n");
63    }
64
65    #[test]
66    fn no_marker_returns_full() {
67        let (p, d) = split_data_section("print 1;\n");
68        assert_eq!(p, "print 1;\n");
69        assert!(d.is_none());
70    }
71
72    #[test]
73    fn splits_at_data() {
74        let (p, d) = split_data_section("p 1;\n__DATA__\na\nb\n");
75        assert_eq!(p, "p 1;");
76        assert_eq!(d, Some(b"a\nb\n".to_vec()));
77    }
78
79    #[test]
80    fn data_marker_only_yields_empty_program() {
81        let (p, d) = split_data_section("__DATA__\n");
82        assert_eq!(p, "");
83        assert_eq!(d, Some(Vec::new()));
84    }
85
86    #[test]
87    fn data_marker_with_trailing_spaces_on_line() {
88        let (p, d) = split_data_section("1;\n__DATA__   \nbody\n");
89        assert_eq!(p, "1;");
90        assert_eq!(d, Some(b"body\n".to_vec()));
91    }
92
93    #[test]
94    fn no_newline_after_last_program_line_before_marker() {
95        let (p, d) = split_data_section("print\n__DATA__\nx");
96        assert_eq!(p, "print");
97        assert_eq!(d, Some(b"x\n".to_vec()));
98    }
99}