Skip to main content

panache_parser/parser/utils/
hashpipe_normalizer.rs

1//! Shared hashpipe header detection and normalization utilities.
2//!
3//! This module detects the contiguous hashpipe YAML preamble at the start of a
4//! code block content string, strips line prefixes into normalized YAML text,
5//! and records deterministic host↔normalized range mappings.
6
7use std::ops::Range;
8
9/// Prefix markers explicitly supported by hashpipe normalization.
10pub const SUPPORTED_HASHPIPE_PREFIXES: [&str; 3] = ["#|", "//|", "--|"];
11
12/// Per-line mapping between host (original content) and normalized YAML text.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct HashpipeLineMapping {
15    /// Byte range of the full host line (including newline, if present).
16    pub host_line_range: Range<usize>,
17    /// Byte range of stripped host line content (without trailing newline bytes).
18    pub host_stripped_range: Range<usize>,
19    /// Byte range of normalized line content (without normalized newline byte).
20    pub normalized_content_range: Range<usize>,
21    /// Byte range of normalized line including normalized newline, if present.
22    pub normalized_line_range: Range<usize>,
23    /// Host newline byte length for this line (0, 1 for LF, 2 for CRLF).
24    pub host_newline_len: usize,
25}
26
27/// Result of hashpipe header detection and stripping.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct HashpipeHeaderNormalization {
30    /// Prefix that was used for detection and stripping.
31    pub prefix: String,
32    /// Number of contiguous hashpipe header lines consumed from the start.
33    pub header_line_count: usize,
34    /// Byte span of the detected header in host content.
35    pub header_byte_span: Range<usize>,
36    /// Prefix-stripped YAML text with deterministic `\n` newlines.
37    pub normalized_yaml: String,
38    /// Per-line host↔normalized mapping metadata.
39    pub line_mappings: Vec<HashpipeLineMapping>,
40}
41
42#[derive(Debug, Clone, Copy)]
43struct LineSlice<'a> {
44    line_without_newline: &'a str,
45    start: usize,
46    end: usize,
47    newline_len: usize,
48}
49
50/// Normalize a contiguous leading hashpipe header into YAML text.
51///
52/// Returns `None` when the input does not start with a hashpipe-prefixed line
53/// for the provided prefix.
54pub fn normalize_hashpipe_header(
55    content: &str,
56    prefix: &str,
57) -> Option<HashpipeHeaderNormalization> {
58    if !SUPPORTED_HASHPIPE_PREFIXES.contains(&prefix) {
59        return None;
60    }
61
62    let lines = split_lines_with_offsets(content);
63    if lines.is_empty() {
64        return None;
65    }
66
67    let mut consumed = 0usize;
68    let mut saw_prefix = false;
69
70    while consumed < lines.len() {
71        let line = lines[consumed];
72        let trimmed = line.line_without_newline.trim_start_matches([' ', '\t']);
73
74        if trimmed.starts_with(prefix) {
75            saw_prefix = true;
76            consumed += 1;
77            continue;
78        }
79
80        break;
81    }
82
83    if !saw_prefix || consumed == 0 {
84        return None;
85    }
86
87    let header_end = lines[consumed - 1].end;
88    let mut normalized_yaml = String::new();
89    let mut line_mappings = Vec::with_capacity(consumed);
90    let mut normalized_pos = 0usize;
91
92    for line in &lines[..consumed] {
93        let stripped = strip_hashpipe_prefix_once(line.line_without_newline, prefix)?;
94
95        let trimmed_start = line.line_without_newline.trim_start_matches([' ', '\t']);
96        let leading_ws_len = line.line_without_newline.len() - trimmed_start.len();
97        let after_prefix = &trimmed_start[prefix.len()..];
98        let removed_space_len = usize::from(after_prefix.starts_with([' ', '\t']));
99        let host_stripped_start = line.start + leading_ws_len + prefix.len() + removed_space_len;
100        let host_stripped_end = line.start + line.line_without_newline.len();
101
102        let normalized_content_start = normalized_pos;
103        normalized_yaml.push_str(stripped);
104        normalized_pos += stripped.len();
105        if line.newline_len > 0 {
106            normalized_yaml.push('\n');
107            normalized_pos += 1;
108        }
109
110        line_mappings.push(HashpipeLineMapping {
111            host_line_range: line.start..line.end,
112            host_stripped_range: host_stripped_start..host_stripped_end,
113            normalized_content_range: normalized_content_start
114                ..(normalized_content_start + stripped.len()),
115            normalized_line_range: normalized_content_start..normalized_pos,
116            host_newline_len: line.newline_len,
117        });
118    }
119
120    Some(HashpipeHeaderNormalization {
121        prefix: prefix.to_string(),
122        header_line_count: consumed,
123        header_byte_span: 0..header_end,
124        normalized_yaml,
125        line_mappings,
126    })
127}
128
129fn split_lines_with_offsets(content: &str) -> Vec<LineSlice<'_>> {
130    let mut lines = Vec::new();
131    let mut idx = 0usize;
132    let bytes = content.as_bytes();
133
134    while idx < content.len() {
135        let mut end = idx;
136        while end < content.len() && bytes[end] != b'\n' {
137            end += 1;
138        }
139        if end < content.len() {
140            end += 1; // include '\n'
141        }
142
143        let full = &content[idx..end];
144        let newline_len = if full.ends_with("\r\n") {
145            2
146        } else if full.ends_with('\n') {
147            1
148        } else {
149            0
150        };
151        let line_without_newline = &full[..full.len().saturating_sub(newline_len)];
152
153        lines.push(LineSlice {
154            line_without_newline,
155            start: idx,
156            end,
157            newline_len,
158        });
159
160        idx = end;
161    }
162
163    lines
164}
165
166fn strip_hashpipe_prefix_once<'a>(line_without_newline: &'a str, prefix: &str) -> Option<&'a str> {
167    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
168    let after_prefix = trimmed_start.strip_prefix(prefix)?;
169    if let Some(rest) = after_prefix.strip_prefix(' ') {
170        return Some(rest);
171    }
172    if let Some(rest) = after_prefix.strip_prefix('\t') {
173        return Some(rest);
174    }
175    Some(after_prefix)
176}
177
178#[cfg(test)]
179mod tests {
180    use super::normalize_hashpipe_header;
181
182    #[test]
183    fn normalizes_supported_prefixes() {
184        for prefix in ["#|", "//|", "--|"] {
185            let input = format!("{prefix} echo: true\n{prefix} warning: false\nx <- 1\n");
186            let normalized = normalize_hashpipe_header(&input, prefix).expect("expected header");
187            assert_eq!(normalized.header_line_count, 2);
188            assert_eq!(
189                normalized.header_byte_span,
190                0..(input.lines().take(2).map(|l| l.len() + 1).sum())
191            );
192            assert_eq!(normalized.normalized_yaml, "echo: true\nwarning: false\n");
193            assert_eq!(normalized.line_mappings.len(), 2);
194        }
195    }
196
197    #[test]
198    fn handles_multiline_quoted_value() {
199        let input = "#| title: \"hello\n#|   world\"\n#| echo: true\nbody\n";
200        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
201        assert_eq!(normalized.header_line_count, 3);
202        assert_eq!(
203            normalized.normalized_yaml,
204            "title: \"hello\n  world\"\necho: true\n"
205        );
206    }
207
208    #[test]
209    fn handles_flow_collection_and_block_scalar_and_indented_value() {
210        let flow = "#| tags: [a,\n#|   b,\n#|   c]\ncode\n";
211        let flow_norm = normalize_hashpipe_header(flow, "#|").expect("expected flow header");
212        assert_eq!(flow_norm.header_line_count, 3);
213        assert_eq!(flow_norm.normalized_yaml, "tags: [a,\n  b,\n  c]\n");
214
215        let block_scalar = "#| fig-cap: |\n#|   one\n#|   two\n#| echo: true\n";
216        let block_norm =
217            normalize_hashpipe_header(block_scalar, "#|").expect("expected scalar header");
218        assert_eq!(block_norm.header_line_count, 4);
219        assert_eq!(
220            block_norm.normalized_yaml,
221            "fig-cap: |\n  one\n  two\necho: true\n"
222        );
223
224        let indented = "#| fig-cap:\n#|   - A\n#|   - B\nplot()\n";
225        let indented_norm =
226            normalize_hashpipe_header(indented, "#|").expect("expected indented header");
227        assert_eq!(indented_norm.header_line_count, 3);
228        assert_eq!(indented_norm.normalized_yaml, "fig-cap:\n  - A\n  - B\n");
229    }
230
231    #[test]
232    fn keeps_contiguous_prefixed_lines_even_when_not_option_shaped() {
233        let input = "#| fig-subcap:\n#| - ROC\n#|  - PR Curve\nx <- 1\n";
234        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
235        assert_eq!(normalized.header_line_count, 3);
236        assert_eq!(
237            normalized.normalized_yaml,
238            "fig-subcap:\n- ROC\n - PR Curve\n"
239        );
240    }
241
242    #[test]
243    fn handles_no_header_and_partial_header() {
244        assert!(normalize_hashpipe_header("plot(1:3)\n#| echo: true\n", "#|").is_none());
245
246        let input = "#| echo: true\nplot(1:3)\n#| warning: false\n";
247        let normalized = normalize_hashpipe_header(input, "#|").expect("expected leading header");
248        assert_eq!(normalized.header_line_count, 1);
249        assert_eq!(normalized.normalized_yaml, "echo: true\n");
250        assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
251    }
252
253    #[test]
254    fn handles_crlf_deterministically() {
255        let input = "#| echo: true\r\n#|  warning: false\r\nbody\r\n";
256        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
257        assert_eq!(normalized.header_line_count, 2);
258        assert_eq!(normalized.normalized_yaml, "echo: true\n warning: false\n");
259        assert_eq!(normalized.line_mappings[0].host_newline_len, 2);
260        assert_eq!(normalized.line_mappings[1].host_newline_len, 2);
261        assert_eq!(
262            normalized.header_byte_span.end,
263            "#| echo: true\r\n#|  warning: false\r\n".len()
264        );
265    }
266}