panache_parser/parser/utils/
hashpipe_normalizer.rs1use std::ops::Range;
8
9pub const SUPPORTED_HASHPIPE_PREFIXES: [&str; 3] = ["#|", "//|", "--|"];
11
12#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct HashpipeLineMapping {
15 pub host_line_range: Range<usize>,
17 pub host_stripped_range: Range<usize>,
19 pub normalized_content_range: Range<usize>,
21 pub normalized_line_range: Range<usize>,
23 pub host_newline_len: usize,
25}
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct HashpipeHeaderNormalization {
30 pub prefix: String,
32 pub header_line_count: usize,
34 pub header_byte_span: Range<usize>,
36 pub normalized_yaml: String,
38 pub line_mappings: Vec<HashpipeLineMapping>,
40}
41
42#[derive(Debug, Clone, Copy)]
43struct LineSlice<'a> {
44 line_without_newline: &'a str,
45 start: usize,
46 end: usize,
47 newline_len: usize,
48}
49
50pub fn normalize_hashpipe_header(
55 content: &str,
56 prefix: &str,
57) -> Option<HashpipeHeaderNormalization> {
58 if !SUPPORTED_HASHPIPE_PREFIXES.contains(&prefix) {
59 return None;
60 }
61
62 let lines = split_lines_with_offsets(content);
63 if lines.is_empty() {
64 return None;
65 }
66
67 let mut consumed = 0usize;
68 let mut saw_prefix = false;
69
70 while consumed < lines.len() {
71 let line = lines[consumed];
72 let has_following_prefixed_line = lines
73 .get(consumed + 1)
74 .map(|next| {
75 next.line_without_newline
76 .trim_start_matches([' ', '\t'])
77 .starts_with(prefix)
78 })
79 .unwrap_or(false);
80 if is_hashpipe_option_or_continuation_line(
81 line.line_without_newline,
82 prefix,
83 has_following_prefixed_line,
84 ) {
85 saw_prefix = true;
86 consumed += 1;
87 continue;
88 }
89 break;
90 }
91
92 if !saw_prefix || consumed == 0 {
93 return None;
94 }
95
96 let header_end = lines[consumed - 1].end;
97 let mut normalized_yaml = String::new();
98 let mut line_mappings = Vec::with_capacity(consumed);
99 let mut normalized_pos = 0usize;
100
101 for line in &lines[..consumed] {
102 let stripped = strip_hashpipe_prefix_once(line.line_without_newline, prefix)?;
103
104 let trimmed_start = line.line_without_newline.trim_start_matches([' ', '\t']);
105 let leading_ws_len = line.line_without_newline.len() - trimmed_start.len();
106 let after_prefix = &trimmed_start[prefix.len()..];
107 let removed_space_len = usize::from(after_prefix.starts_with([' ', '\t']));
108 let host_stripped_start = line.start + leading_ws_len + prefix.len() + removed_space_len;
109 let host_stripped_end = line.start + line.line_without_newline.len();
110
111 let normalized_content_start = normalized_pos;
112 normalized_yaml.push_str(stripped);
113 normalized_pos += stripped.len();
114 if line.newline_len > 0 {
115 normalized_yaml.push('\n');
116 normalized_pos += 1;
117 }
118
119 line_mappings.push(HashpipeLineMapping {
120 host_line_range: line.start..line.end,
121 host_stripped_range: host_stripped_start..host_stripped_end,
122 normalized_content_range: normalized_content_start
123 ..(normalized_content_start + stripped.len()),
124 normalized_line_range: normalized_content_start..normalized_pos,
125 host_newline_len: line.newline_len,
126 });
127 }
128
129 Some(HashpipeHeaderNormalization {
130 prefix: prefix.to_string(),
131 header_line_count: consumed,
132 header_byte_span: 0..header_end,
133 normalized_yaml,
134 line_mappings,
135 })
136}
137
138fn split_lines_with_offsets(content: &str) -> Vec<LineSlice<'_>> {
139 let mut lines = Vec::new();
140 let mut idx = 0usize;
141 let bytes = content.as_bytes();
142
143 while idx < content.len() {
144 let mut end = idx;
145 while end < content.len() && bytes[end] != b'\n' {
146 end += 1;
147 }
148 if end < content.len() {
149 end += 1; }
151
152 let full = &content[idx..end];
153 let newline_len = if full.ends_with("\r\n") {
154 2
155 } else if full.ends_with('\n') {
156 1
157 } else {
158 0
159 };
160 let line_without_newline = &full[..full.len().saturating_sub(newline_len)];
161
162 lines.push(LineSlice {
163 line_without_newline,
164 start: idx,
165 end,
166 newline_len,
167 });
168
169 idx = end;
170 }
171
172 lines
173}
174
175fn strip_hashpipe_prefix_once<'a>(line_without_newline: &'a str, prefix: &str) -> Option<&'a str> {
176 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
177 let after_prefix = trimmed_start.strip_prefix(prefix)?;
178 if let Some(rest) = after_prefix.strip_prefix(' ') {
179 return Some(rest);
180 }
181 if let Some(rest) = after_prefix.strip_prefix('\t') {
182 return Some(rest);
183 }
184 Some(after_prefix)
185}
186
187fn is_hashpipe_option_or_continuation_line(
188 line_without_newline: &str,
189 prefix: &str,
190 has_following_prefixed_line: bool,
191) -> bool {
192 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
193 if !trimmed_start.starts_with(prefix) {
194 return false;
195 }
196 let after_prefix = &trimmed_start[prefix.len()..];
197 let rest = after_prefix.trim_start_matches([' ', '\t']);
198
199 if rest.is_empty() {
200 return has_following_prefixed_line;
201 }
202
203 if rest.contains(':') {
204 let key = rest
205 .split_once(':')
206 .map(|(k, _)| k)
207 .unwrap_or("")
208 .trim_end();
209 return !key.is_empty();
210 }
211
212 after_prefix.starts_with([' ', '\t'])
213}
214
215#[cfg(test)]
216mod tests {
217 use super::normalize_hashpipe_header;
218
219 #[test]
220 fn normalizes_supported_prefixes() {
221 for prefix in ["#|", "//|", "--|"] {
222 let input = format!("{prefix} echo: true\n{prefix} warning: false\nx <- 1\n");
223 let normalized = normalize_hashpipe_header(&input, prefix).expect("expected header");
224 assert_eq!(normalized.header_line_count, 2);
225 assert_eq!(
226 normalized.header_byte_span,
227 0..(input.lines().take(2).map(|l| l.len() + 1).sum())
228 );
229 assert_eq!(normalized.normalized_yaml, "echo: true\nwarning: false\n");
230 assert_eq!(normalized.line_mappings.len(), 2);
231 }
232 }
233
234 #[test]
235 fn handles_multiline_quoted_value() {
236 let input = "#| title: \"hello\n#| world\"\n#| echo: true\nbody\n";
237 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
238 assert_eq!(normalized.header_line_count, 3);
239 assert_eq!(
240 normalized.normalized_yaml,
241 "title: \"hello\n world\"\necho: true\n"
242 );
243 }
244
245 #[test]
246 fn handles_flow_collection_and_block_scalar_and_indented_value() {
247 let flow = "#| tags: [a,\n#| b,\n#| c]\ncode\n";
248 let flow_norm = normalize_hashpipe_header(flow, "#|").expect("expected flow header");
249 assert_eq!(flow_norm.header_line_count, 3);
250 assert_eq!(flow_norm.normalized_yaml, "tags: [a,\n b,\n c]\n");
251
252 let block_scalar = "#| fig-cap: |\n#| one\n#| two\n#| echo: true\n";
253 let block_norm =
254 normalize_hashpipe_header(block_scalar, "#|").expect("expected scalar header");
255 assert_eq!(block_norm.header_line_count, 4);
256 assert_eq!(
257 block_norm.normalized_yaml,
258 "fig-cap: |\n one\n two\necho: true\n"
259 );
260
261 let indented = "#| fig-cap:\n#| - A\n#| - B\nplot()\n";
262 let indented_norm =
263 normalize_hashpipe_header(indented, "#|").expect("expected indented header");
264 assert_eq!(indented_norm.header_line_count, 3);
265 assert_eq!(indented_norm.normalized_yaml, "fig-cap:\n - A\n - B\n");
266 }
267
268 #[test]
269 fn keeps_contiguous_prefixed_lines_even_when_not_option_shaped() {
270 let input = "#| fig-subcap:\n#| - ROC\n#| - PR Curve\nx <- 1\n";
271 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
272 assert_eq!(normalized.header_line_count, 3);
273 assert_eq!(
274 normalized.normalized_yaml,
275 "fig-subcap:\n- ROC\n - PR Curve\n"
276 );
277 }
278
279 #[test]
280 fn handles_no_header_and_partial_header() {
281 assert!(normalize_hashpipe_header("plot(1:3)\n#| echo: true\n", "#|").is_none());
282
283 let input = "#| echo: true\nplot(1:3)\n#| warning: false\n";
284 let normalized = normalize_hashpipe_header(input, "#|").expect("expected leading header");
285 assert_eq!(normalized.header_line_count, 1);
286 assert_eq!(normalized.normalized_yaml, "echo: true\n");
287 assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
288 }
289
290 #[test]
291 fn does_not_consume_standalone_prefix_line() {
292 let input = "#| echo: true\n#|\nbody\n";
293 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
294 assert_eq!(normalized.header_line_count, 1);
295 assert_eq!(normalized.normalized_yaml, "echo: true\n");
296 assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
297 }
298
299 #[test]
300 fn consumes_standalone_prefix_line_when_followed_by_prefixed_continuation() {
301 let input = "#| fig-alt: |\n#| one\n#|\n#| two\nplot(1)\n";
302 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
303 assert_eq!(normalized.header_line_count, 4);
304 assert_eq!(normalized.normalized_yaml, "fig-alt: |\n one\n\n two\n");
305 assert_eq!(
306 normalized.header_byte_span.end,
307 "#| fig-alt: |\n#| one\n#|\n#| two\n".len()
308 );
309 }
310
311 #[test]
312 fn handles_crlf_deterministically() {
313 let input = "#| echo: true\r\n#| warning: false\r\nbody\r\n";
314 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
315 assert_eq!(normalized.header_line_count, 2);
316 assert_eq!(normalized.normalized_yaml, "echo: true\n warning: false\n");
317 assert_eq!(normalized.line_mappings[0].host_newline_len, 2);
318 assert_eq!(normalized.line_mappings[1].host_newline_len, 2);
319 assert_eq!(
320 normalized.header_byte_span.end,
321 "#| echo: true\r\n#| warning: false\r\n".len()
322 );
323 }
324}