panache_parser/parser/utils/
hashpipe_normalizer.rs1use std::ops::Range;
8
9pub const SUPPORTED_HASHPIPE_PREFIXES: [&str; 3] = ["#|", "//|", "--|"];
11
12#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct HashpipeLineMapping {
15 pub host_line_range: Range<usize>,
17 pub host_stripped_range: Range<usize>,
19 pub normalized_content_range: Range<usize>,
21 pub normalized_line_range: Range<usize>,
23 pub host_newline_len: usize,
25}
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct HashpipeHeaderNormalization {
30 pub prefix: String,
32 pub header_line_count: usize,
34 pub header_byte_span: Range<usize>,
36 pub normalized_yaml: String,
38 pub line_mappings: Vec<HashpipeLineMapping>,
40}
41
42#[derive(Debug, Clone, Copy)]
43struct LineSlice<'a> {
44 line_without_newline: &'a str,
45 start: usize,
46 end: usize,
47 newline_len: usize,
48}
49
50pub fn normalize_hashpipe_header(
55 content: &str,
56 prefix: &str,
57) -> Option<HashpipeHeaderNormalization> {
58 if !SUPPORTED_HASHPIPE_PREFIXES.contains(&prefix) {
59 return None;
60 }
61
62 let lines = split_lines_with_offsets(content);
63 if lines.is_empty() {
64 return None;
65 }
66
67 let mut consumed = 0usize;
68 let mut saw_prefix = false;
69 let mut open_quoted: Option<String> = None;
70 let mut open_block_scalar = false;
71 let mut open_flow_collection = false;
72 let mut open_indented_yaml_value = false;
73
74 while consumed < lines.len() {
75 let line = lines[consumed];
76 let trimmed = line.line_without_newline.trim_start_matches([' ', '\t']);
77
78 if let Some(mut value) = open_quoted.take()
79 && let Some(fragment) = continuation_value(trimmed, prefix)
80 {
81 if !value.ends_with(' ') {
82 value.push(' ');
83 }
84 value.push_str(&fragment);
85 consumed += 1;
86 if is_unclosed_double_quoted(&value) {
87 open_quoted = Some(value);
88 }
89 continue;
90 }
91
92 if open_block_scalar {
93 if let Some(after_prefix) = trimmed.strip_prefix(prefix)
94 && is_block_scalar_continuation_line(after_prefix)
95 {
96 consumed += 1;
97 continue;
98 }
99 open_block_scalar = false;
100 }
101
102 if open_flow_collection {
103 if let Some(after_prefix) = trimmed.strip_prefix(prefix)
104 && is_flow_collection_continuation_line(after_prefix)
105 {
106 consumed += 1;
107 if let Some(value) = option_value(trimmed, prefix)
108 && !is_unclosed_flow_collection(&value)
109 {
110 open_flow_collection = false;
111 }
112 continue;
113 }
114 open_flow_collection = false;
115 }
116
117 if open_indented_yaml_value {
118 if let Some(after_prefix) = trimmed.strip_prefix(prefix)
119 && is_block_scalar_continuation_line(after_prefix)
120 {
121 consumed += 1;
122 continue;
123 }
124 open_indented_yaml_value = false;
125 }
126
127 if is_hashpipe_option_line(trimmed, prefix) {
128 saw_prefix = true;
129 if let Some(value) = option_value(trimmed, prefix) {
130 if is_unclosed_double_quoted(&value) {
131 open_quoted = Some(value);
132 } else if is_yaml_block_scalar_indicator(&value) {
133 open_block_scalar = true;
134 } else if is_unclosed_flow_collection(&value) {
135 open_flow_collection = true;
136 } else if value.is_empty() {
137 open_indented_yaml_value = true;
138 }
139 }
140 consumed += 1;
141 continue;
142 }
143
144 break;
145 }
146
147 if !saw_prefix || consumed == 0 {
148 return None;
149 }
150
151 let header_end = lines[consumed - 1].end;
152 let mut normalized_yaml = String::new();
153 let mut line_mappings = Vec::with_capacity(consumed);
154 let mut normalized_pos = 0usize;
155
156 for line in &lines[..consumed] {
157 let stripped = strip_hashpipe_prefix_once(line.line_without_newline, prefix)?;
158
159 let trimmed_start = line.line_without_newline.trim_start_matches([' ', '\t']);
160 let leading_ws_len = line.line_without_newline.len() - trimmed_start.len();
161 let after_prefix = &trimmed_start[prefix.len()..];
162 let removed_space_len = usize::from(after_prefix.starts_with([' ', '\t']));
163 let host_stripped_start = line.start + leading_ws_len + prefix.len() + removed_space_len;
164 let host_stripped_end = line.start + line.line_without_newline.len();
165
166 let normalized_content_start = normalized_pos;
167 normalized_yaml.push_str(stripped);
168 normalized_pos += stripped.len();
169 if line.newline_len > 0 {
170 normalized_yaml.push('\n');
171 normalized_pos += 1;
172 }
173
174 line_mappings.push(HashpipeLineMapping {
175 host_line_range: line.start..line.end,
176 host_stripped_range: host_stripped_start..host_stripped_end,
177 normalized_content_range: normalized_content_start
178 ..(normalized_content_start + stripped.len()),
179 normalized_line_range: normalized_content_start..normalized_pos,
180 host_newline_len: line.newline_len,
181 });
182 }
183
184 Some(HashpipeHeaderNormalization {
185 prefix: prefix.to_string(),
186 header_line_count: consumed,
187 header_byte_span: 0..header_end,
188 normalized_yaml,
189 line_mappings,
190 })
191}
192
193fn split_lines_with_offsets(content: &str) -> Vec<LineSlice<'_>> {
194 let mut lines = Vec::new();
195 let mut idx = 0usize;
196 let bytes = content.as_bytes();
197
198 while idx < content.len() {
199 let mut end = idx;
200 while end < content.len() && bytes[end] != b'\n' {
201 end += 1;
202 }
203 if end < content.len() {
204 end += 1; }
206
207 let full = &content[idx..end];
208 let newline_len = if full.ends_with("\r\n") {
209 2
210 } else if full.ends_with('\n') {
211 1
212 } else {
213 0
214 };
215 let line_without_newline = &full[..full.len().saturating_sub(newline_len)];
216
217 lines.push(LineSlice {
218 line_without_newline,
219 start: idx,
220 end,
221 newline_len,
222 });
223
224 idx = end;
225 }
226
227 lines
228}
229
230fn strip_hashpipe_prefix_once<'a>(line_without_newline: &'a str, prefix: &str) -> Option<&'a str> {
231 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
232 let after_prefix = trimmed_start.strip_prefix(prefix)?;
233 if let Some(rest) = after_prefix.strip_prefix(' ') {
234 return Some(rest);
235 }
236 if let Some(rest) = after_prefix.strip_prefix('\t') {
237 return Some(rest);
238 }
239 Some(after_prefix)
240}
241
242fn is_hashpipe_option_line(line_without_newline: &str, prefix: &str) -> bool {
243 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
244 if !trimmed_start.starts_with(prefix) {
245 return false;
246 }
247 let after_prefix = &trimmed_start[prefix.len()..];
248 let rest = after_prefix.trim_start_matches([' ', '\t']);
249 let Some(colon_idx) = rest.find(':') else {
250 return false;
251 };
252 let key = rest[..colon_idx].trim_end_matches([' ', '\t']);
253 !key.is_empty()
254}
255
256fn option_value(line_without_newline: &str, prefix: &str) -> Option<String> {
257 if !is_hashpipe_option_line(line_without_newline, prefix) {
258 return None;
259 }
260 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
261 let after_prefix = &trimmed_start[prefix.len()..];
262 let rest = after_prefix.trim_start_matches([' ', '\t']);
263 let colon_idx = rest.find(':')?;
264 let value = rest[colon_idx + 1..]
265 .trim_start_matches([' ', '\t'])
266 .trim_end_matches([' ', '\t']);
267 Some(value.to_string())
268}
269
270fn continuation_value(line_without_newline: &str, prefix: &str) -> Option<String> {
271 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
272 if !trimmed_start.starts_with(prefix) {
273 return None;
274 }
275 let after_prefix = &trimmed_start[prefix.len()..];
276 let first = after_prefix.chars().next()?;
277 if first != ' ' && first != '\t' {
278 return None;
279 }
280 let value = after_prefix
281 .trim_start_matches([' ', '\t'])
282 .trim_end_matches([' ', '\t']);
283 if value.is_empty() {
284 None
285 } else {
286 Some(value.to_string())
287 }
288}
289
290fn is_yaml_block_scalar_indicator(value: &str) -> bool {
291 let s = value.trim();
292 if s.is_empty() {
293 return false;
294 }
295 let mut chars = s.chars();
296 let Some(style) = chars.next() else {
297 return false;
298 };
299 if style != '|' && style != '>' {
300 return false;
301 }
302 chars.all(|ch| ch == '+' || ch == '-' || ch.is_ascii_digit())
303}
304
305fn leading_ws_count(text: &str) -> usize {
306 text.chars().take_while(|c| matches!(c, ' ' | '\t')).count()
307}
308
309fn is_block_scalar_continuation_line(after_prefix: &str) -> bool {
310 let text = after_prefix.trim_end_matches(['\n', '\r']);
311 if text.trim().is_empty() {
312 return true;
313 }
314 leading_ws_count(text) >= 2
315}
316
317fn is_flow_collection_continuation_line(after_prefix: &str) -> bool {
318 if is_block_scalar_continuation_line(after_prefix) {
319 return true;
320 }
321 let trimmed = after_prefix
322 .trim_end_matches(['\n', '\r'])
323 .trim_start_matches([' ', '\t']);
324 trimmed.starts_with(']') || trimmed.starts_with('}')
325}
326
327fn is_unclosed_double_quoted(value: &str) -> bool {
328 if !value.starts_with('"') {
329 return false;
330 }
331 let mut escaped = false;
332 let mut quote_count = 0usize;
333 for ch in value.chars() {
334 if escaped {
335 escaped = false;
336 continue;
337 }
338 if ch == '\\' {
339 escaped = true;
340 continue;
341 }
342 if ch == '"' {
343 quote_count += 1;
344 }
345 }
346 quote_count % 2 == 1
347}
348
349fn is_unclosed_flow_collection(value: &str) -> bool {
350 let trimmed = value.trim_start();
351 if !trimmed.starts_with('[') && !trimmed.starts_with('{') {
352 return false;
353 }
354
355 let mut stack: Vec<char> = Vec::new();
356 let mut in_single = false;
357 let mut in_double = false;
358 let mut escaped = false;
359
360 for ch in value.chars() {
361 if escaped {
362 escaped = false;
363 continue;
364 }
365 match ch {
366 '\\' if in_double => escaped = true,
367 '\'' if !in_double => in_single = !in_single,
368 '"' if !in_single => in_double = !in_double,
369 '[' | '{' if !in_single && !in_double => stack.push(ch),
370 ']' if !in_single && !in_double => {
371 if stack.pop() != Some('[') {
372 return false;
373 }
374 }
375 '}' if !in_single && !in_double => {
376 if stack.pop() != Some('{') {
377 return false;
378 }
379 }
380 _ => {}
381 }
382 }
383
384 !stack.is_empty() || in_single || in_double
385}
386
387#[cfg(test)]
388mod tests {
389 use super::normalize_hashpipe_header;
390
391 #[test]
392 fn normalizes_supported_prefixes() {
393 for prefix in ["#|", "//|", "--|"] {
394 let input = format!("{prefix} echo: true\n{prefix} warning: false\nx <- 1\n");
395 let normalized = normalize_hashpipe_header(&input, prefix).expect("expected header");
396 assert_eq!(normalized.header_line_count, 2);
397 assert_eq!(
398 normalized.header_byte_span,
399 0..(input.lines().take(2).map(|l| l.len() + 1).sum())
400 );
401 assert_eq!(normalized.normalized_yaml, "echo: true\nwarning: false\n");
402 assert_eq!(normalized.line_mappings.len(), 2);
403 }
404 }
405
406 #[test]
407 fn handles_multiline_quoted_value() {
408 let input = "#| title: \"hello\n#| world\"\n#| echo: true\nbody\n";
409 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
410 assert_eq!(normalized.header_line_count, 3);
411 assert_eq!(
412 normalized.normalized_yaml,
413 "title: \"hello\n world\"\necho: true\n"
414 );
415 }
416
417 #[test]
418 fn handles_flow_collection_and_block_scalar_and_indented_value() {
419 let flow = "#| tags: [a,\n#| b,\n#| c]\ncode\n";
420 let flow_norm = normalize_hashpipe_header(flow, "#|").expect("expected flow header");
421 assert_eq!(flow_norm.header_line_count, 3);
422 assert_eq!(flow_norm.normalized_yaml, "tags: [a,\n b,\n c]\n");
423
424 let block_scalar = "#| fig-cap: |\n#| one\n#| two\n#| echo: true\n";
425 let block_norm =
426 normalize_hashpipe_header(block_scalar, "#|").expect("expected scalar header");
427 assert_eq!(block_norm.header_line_count, 4);
428 assert_eq!(
429 block_norm.normalized_yaml,
430 "fig-cap: |\n one\n two\necho: true\n"
431 );
432
433 let indented = "#| fig-cap:\n#| - A\n#| - B\nplot()\n";
434 let indented_norm =
435 normalize_hashpipe_header(indented, "#|").expect("expected indented header");
436 assert_eq!(indented_norm.header_line_count, 3);
437 assert_eq!(indented_norm.normalized_yaml, "fig-cap:\n - A\n - B\n");
438 }
439
440 #[test]
441 fn handles_no_header_and_partial_header() {
442 assert!(normalize_hashpipe_header("plot(1:3)\n#| echo: true\n", "#|").is_none());
443
444 let input = "#| echo: true\nplot(1:3)\n#| warning: false\n";
445 let normalized = normalize_hashpipe_header(input, "#|").expect("expected leading header");
446 assert_eq!(normalized.header_line_count, 1);
447 assert_eq!(normalized.normalized_yaml, "echo: true\n");
448 assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
449 }
450
451 #[test]
452 fn handles_crlf_deterministically() {
453 let input = "#| echo: true\r\n#| warning: false\r\nbody\r\n";
454 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
455 assert_eq!(normalized.header_line_count, 2);
456 assert_eq!(normalized.normalized_yaml, "echo: true\n warning: false\n");
457 assert_eq!(normalized.line_mappings[0].host_newline_len, 2);
458 assert_eq!(normalized.line_mappings[1].host_newline_len, 2);
459 assert_eq!(
460 normalized.header_byte_span.end,
461 "#| echo: true\r\n#| warning: false\r\n".len()
462 );
463 }
464}