Skip to main content

panache_parser/parser/utils/
hashpipe_normalizer.rs

1//! Shared hashpipe header detection and normalization utilities.
2//!
3//! This module detects the contiguous hashpipe YAML preamble at the start of a
4//! code block content string, strips line prefixes into normalized YAML text,
5//! and records deterministic host↔normalized range mappings.
6
7use std::ops::Range;
8
9/// Prefix markers explicitly supported by hashpipe normalization.
10pub const SUPPORTED_HASHPIPE_PREFIXES: [&str; 3] = ["#|", "//|", "--|"];
11
12/// Per-line mapping between host (original content) and normalized YAML text.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct HashpipeLineMapping {
15    /// Byte range of the full host line (including newline, if present).
16    pub host_line_range: Range<usize>,
17    /// Byte range of stripped host line content (without trailing newline bytes).
18    pub host_stripped_range: Range<usize>,
19    /// Byte range of normalized line content (without normalized newline byte).
20    pub normalized_content_range: Range<usize>,
21    /// Byte range of normalized line including normalized newline, if present.
22    pub normalized_line_range: Range<usize>,
23    /// Host newline byte length for this line (0, 1 for LF, 2 for CRLF).
24    pub host_newline_len: usize,
25}
26
27/// Result of hashpipe header detection and stripping.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct HashpipeHeaderNormalization {
30    /// Prefix that was used for detection and stripping.
31    pub prefix: String,
32    /// Number of contiguous hashpipe header lines consumed from the start.
33    pub header_line_count: usize,
34    /// Byte span of the detected header in host content.
35    pub header_byte_span: Range<usize>,
36    /// Prefix-stripped YAML text with deterministic `\n` newlines.
37    pub normalized_yaml: String,
38    /// Per-line host↔normalized mapping metadata.
39    pub line_mappings: Vec<HashpipeLineMapping>,
40}
41
42#[derive(Debug, Clone, Copy)]
43struct LineSlice<'a> {
44    line_without_newline: &'a str,
45    start: usize,
46    end: usize,
47    newline_len: usize,
48}
49
50/// Normalize a contiguous leading hashpipe header into YAML text.
51///
52/// Returns `None` when the input does not start with a hashpipe-prefixed line
53/// for the provided prefix.
54pub fn normalize_hashpipe_header(
55    content: &str,
56    prefix: &str,
57) -> Option<HashpipeHeaderNormalization> {
58    if !SUPPORTED_HASHPIPE_PREFIXES.contains(&prefix) {
59        return None;
60    }
61
62    let lines = split_lines_with_offsets(content);
63    if lines.is_empty() {
64        return None;
65    }
66
67    let mut consumed = 0usize;
68    let mut saw_prefix = false;
69
70    while consumed < lines.len() {
71        let line = lines[consumed];
72        let has_following_prefixed_line = lines
73            .get(consumed + 1)
74            .map(|next| {
75                next.line_without_newline
76                    .trim_start_matches([' ', '\t'])
77                    .starts_with(prefix)
78            })
79            .unwrap_or(false);
80        if is_hashpipe_option_or_continuation_line(
81            line.line_without_newline,
82            prefix,
83            has_following_prefixed_line,
84        ) {
85            saw_prefix = true;
86            consumed += 1;
87            continue;
88        }
89        break;
90    }
91
92    if !saw_prefix || consumed == 0 {
93        return None;
94    }
95
96    let header_end = lines[consumed - 1].end;
97    let mut normalized_yaml = String::new();
98    let mut line_mappings = Vec::with_capacity(consumed);
99    let mut normalized_pos = 0usize;
100
101    for line in &lines[..consumed] {
102        let stripped = strip_hashpipe_prefix_once(line.line_without_newline, prefix)?;
103
104        let trimmed_start = line.line_without_newline.trim_start_matches([' ', '\t']);
105        let leading_ws_len = line.line_without_newline.len() - trimmed_start.len();
106        let after_prefix = &trimmed_start[prefix.len()..];
107        let removed_space_len = usize::from(after_prefix.starts_with([' ', '\t']));
108        let host_stripped_start = line.start + leading_ws_len + prefix.len() + removed_space_len;
109        let host_stripped_end = line.start + line.line_without_newline.len();
110
111        let normalized_content_start = normalized_pos;
112        normalized_yaml.push_str(stripped);
113        normalized_pos += stripped.len();
114        if line.newline_len > 0 {
115            normalized_yaml.push('\n');
116            normalized_pos += 1;
117        }
118
119        line_mappings.push(HashpipeLineMapping {
120            host_line_range: line.start..line.end,
121            host_stripped_range: host_stripped_start..host_stripped_end,
122            normalized_content_range: normalized_content_start
123                ..(normalized_content_start + stripped.len()),
124            normalized_line_range: normalized_content_start..normalized_pos,
125            host_newline_len: line.newline_len,
126        });
127    }
128
129    Some(HashpipeHeaderNormalization {
130        prefix: prefix.to_string(),
131        header_line_count: consumed,
132        header_byte_span: 0..header_end,
133        normalized_yaml,
134        line_mappings,
135    })
136}
137
138fn split_lines_with_offsets(content: &str) -> Vec<LineSlice<'_>> {
139    let mut lines = Vec::new();
140    let mut idx = 0usize;
141    let bytes = content.as_bytes();
142
143    while idx < content.len() {
144        let mut end = idx;
145        while end < content.len() && bytes[end] != b'\n' {
146            end += 1;
147        }
148        if end < content.len() {
149            end += 1; // include '\n'
150        }
151
152        let full = &content[idx..end];
153        let newline_len = if full.ends_with("\r\n") {
154            2
155        } else if full.ends_with('\n') {
156            1
157        } else {
158            0
159        };
160        let line_without_newline = &full[..full.len().saturating_sub(newline_len)];
161
162        lines.push(LineSlice {
163            line_without_newline,
164            start: idx,
165            end,
166            newline_len,
167        });
168
169        idx = end;
170    }
171
172    lines
173}
174
175fn strip_hashpipe_prefix_once<'a>(line_without_newline: &'a str, prefix: &str) -> Option<&'a str> {
176    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
177    let after_prefix = trimmed_start.strip_prefix(prefix)?;
178    if let Some(rest) = after_prefix.strip_prefix(' ') {
179        return Some(rest);
180    }
181    if let Some(rest) = after_prefix.strip_prefix('\t') {
182        return Some(rest);
183    }
184    Some(after_prefix)
185}
186
187fn is_hashpipe_option_or_continuation_line(
188    line_without_newline: &str,
189    prefix: &str,
190    has_following_prefixed_line: bool,
191) -> bool {
192    let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
193    if !trimmed_start.starts_with(prefix) {
194        return false;
195    }
196    let after_prefix = &trimmed_start[prefix.len()..];
197    let rest = after_prefix.trim_start_matches([' ', '\t']);
198
199    if rest.is_empty() {
200        return has_following_prefixed_line;
201    }
202
203    if rest.contains(':') {
204        let key = rest
205            .split_once(':')
206            .map(|(k, _)| k)
207            .unwrap_or("")
208            .trim_end();
209        return !key.is_empty();
210    }
211
212    after_prefix.starts_with([' ', '\t'])
213}
214
215#[cfg(test)]
216mod tests {
217    use super::normalize_hashpipe_header;
218
219    #[test]
220    fn normalizes_supported_prefixes() {
221        for prefix in ["#|", "//|", "--|"] {
222            let input = format!("{prefix} echo: true\n{prefix} warning: false\nx <- 1\n");
223            let normalized = normalize_hashpipe_header(&input, prefix).expect("expected header");
224            assert_eq!(normalized.header_line_count, 2);
225            assert_eq!(
226                normalized.header_byte_span,
227                0..(input.lines().take(2).map(|l| l.len() + 1).sum())
228            );
229            assert_eq!(normalized.normalized_yaml, "echo: true\nwarning: false\n");
230            assert_eq!(normalized.line_mappings.len(), 2);
231        }
232    }
233
234    #[test]
235    fn handles_multiline_quoted_value() {
236        let input = "#| title: \"hello\n#|   world\"\n#| echo: true\nbody\n";
237        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
238        assert_eq!(normalized.header_line_count, 3);
239        assert_eq!(
240            normalized.normalized_yaml,
241            "title: \"hello\n  world\"\necho: true\n"
242        );
243    }
244
245    #[test]
246    fn handles_flow_collection_and_block_scalar_and_indented_value() {
247        let flow = "#| tags: [a,\n#|   b,\n#|   c]\ncode\n";
248        let flow_norm = normalize_hashpipe_header(flow, "#|").expect("expected flow header");
249        assert_eq!(flow_norm.header_line_count, 3);
250        assert_eq!(flow_norm.normalized_yaml, "tags: [a,\n  b,\n  c]\n");
251
252        let block_scalar = "#| fig-cap: |\n#|   one\n#|   two\n#| echo: true\n";
253        let block_norm =
254            normalize_hashpipe_header(block_scalar, "#|").expect("expected scalar header");
255        assert_eq!(block_norm.header_line_count, 4);
256        assert_eq!(
257            block_norm.normalized_yaml,
258            "fig-cap: |\n  one\n  two\necho: true\n"
259        );
260
261        let indented = "#| fig-cap:\n#|   - A\n#|   - B\nplot()\n";
262        let indented_norm =
263            normalize_hashpipe_header(indented, "#|").expect("expected indented header");
264        assert_eq!(indented_norm.header_line_count, 3);
265        assert_eq!(indented_norm.normalized_yaml, "fig-cap:\n  - A\n  - B\n");
266    }
267
268    #[test]
269    fn keeps_contiguous_prefixed_lines_even_when_not_option_shaped() {
270        let input = "#| fig-subcap:\n#| - ROC\n#|  - PR Curve\nx <- 1\n";
271        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
272        assert_eq!(normalized.header_line_count, 3);
273        assert_eq!(
274            normalized.normalized_yaml,
275            "fig-subcap:\n- ROC\n - PR Curve\n"
276        );
277    }
278
279    #[test]
280    fn handles_no_header_and_partial_header() {
281        assert!(normalize_hashpipe_header("plot(1:3)\n#| echo: true\n", "#|").is_none());
282
283        let input = "#| echo: true\nplot(1:3)\n#| warning: false\n";
284        let normalized = normalize_hashpipe_header(input, "#|").expect("expected leading header");
285        assert_eq!(normalized.header_line_count, 1);
286        assert_eq!(normalized.normalized_yaml, "echo: true\n");
287        assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
288    }
289
290    #[test]
291    fn does_not_consume_standalone_prefix_line() {
292        let input = "#| echo: true\n#|\nbody\n";
293        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
294        assert_eq!(normalized.header_line_count, 1);
295        assert_eq!(normalized.normalized_yaml, "echo: true\n");
296        assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
297    }
298
299    #[test]
300    fn consumes_standalone_prefix_line_when_followed_by_prefixed_continuation() {
301        let input = "#| fig-alt: |\n#|   one\n#|\n#|   two\nplot(1)\n";
302        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
303        assert_eq!(normalized.header_line_count, 4);
304        assert_eq!(normalized.normalized_yaml, "fig-alt: |\n  one\n\n  two\n");
305        assert_eq!(
306            normalized.header_byte_span.end,
307            "#| fig-alt: |\n#|   one\n#|\n#|   two\n".len()
308        );
309    }
310
311    #[test]
312    fn handles_crlf_deterministically() {
313        let input = "#| echo: true\r\n#|  warning: false\r\nbody\r\n";
314        let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
315        assert_eq!(normalized.header_line_count, 2);
316        assert_eq!(normalized.normalized_yaml, "echo: true\n warning: false\n");
317        assert_eq!(normalized.line_mappings[0].host_newline_len, 2);
318        assert_eq!(normalized.line_mappings[1].host_newline_len, 2);
319        assert_eq!(
320            normalized.header_byte_span.end,
321            "#| echo: true\r\n#|  warning: false\r\n".len()
322        );
323    }
324}