panache_parser/parser/utils/
hashpipe_normalizer.rs1use std::ops::Range;
8
9pub const SUPPORTED_HASHPIPE_PREFIXES: [&str; 3] = ["#|", "//|", "--|"];
11
12#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct HashpipeLineMapping {
15 pub host_line_range: Range<usize>,
17 pub host_stripped_range: Range<usize>,
19 pub normalized_content_range: Range<usize>,
21 pub normalized_line_range: Range<usize>,
23 pub host_newline_len: usize,
25}
26
27#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct HashpipeHeaderNormalization {
30 pub prefix: String,
32 pub header_line_count: usize,
34 pub header_byte_span: Range<usize>,
36 pub normalized_yaml: String,
38 pub line_mappings: Vec<HashpipeLineMapping>,
40}
41
42#[derive(Debug, Clone, Copy)]
43struct LineSlice<'a> {
44 line_without_newline: &'a str,
45 start: usize,
46 end: usize,
47 newline_len: usize,
48}
49
50pub fn normalize_hashpipe_header(
55 content: &str,
56 prefix: &str,
57) -> Option<HashpipeHeaderNormalization> {
58 if !SUPPORTED_HASHPIPE_PREFIXES.contains(&prefix) {
59 return None;
60 }
61
62 let lines = split_lines_with_offsets(content);
63 if lines.is_empty() {
64 return None;
65 }
66
67 let mut consumed = 0usize;
68 let mut saw_prefix = false;
69
70 while consumed < lines.len() {
71 let line = lines[consumed];
72 let trimmed = line.line_without_newline.trim_start_matches([' ', '\t']);
73
74 if trimmed.starts_with(prefix) {
75 saw_prefix = true;
76 consumed += 1;
77 continue;
78 }
79
80 break;
81 }
82
83 if !saw_prefix || consumed == 0 {
84 return None;
85 }
86
87 let header_end = lines[consumed - 1].end;
88 let mut normalized_yaml = String::new();
89 let mut line_mappings = Vec::with_capacity(consumed);
90 let mut normalized_pos = 0usize;
91
92 for line in &lines[..consumed] {
93 let stripped = strip_hashpipe_prefix_once(line.line_without_newline, prefix)?;
94
95 let trimmed_start = line.line_without_newline.trim_start_matches([' ', '\t']);
96 let leading_ws_len = line.line_without_newline.len() - trimmed_start.len();
97 let after_prefix = &trimmed_start[prefix.len()..];
98 let removed_space_len = usize::from(after_prefix.starts_with([' ', '\t']));
99 let host_stripped_start = line.start + leading_ws_len + prefix.len() + removed_space_len;
100 let host_stripped_end = line.start + line.line_without_newline.len();
101
102 let normalized_content_start = normalized_pos;
103 normalized_yaml.push_str(stripped);
104 normalized_pos += stripped.len();
105 if line.newline_len > 0 {
106 normalized_yaml.push('\n');
107 normalized_pos += 1;
108 }
109
110 line_mappings.push(HashpipeLineMapping {
111 host_line_range: line.start..line.end,
112 host_stripped_range: host_stripped_start..host_stripped_end,
113 normalized_content_range: normalized_content_start
114 ..(normalized_content_start + stripped.len()),
115 normalized_line_range: normalized_content_start..normalized_pos,
116 host_newline_len: line.newline_len,
117 });
118 }
119
120 Some(HashpipeHeaderNormalization {
121 prefix: prefix.to_string(),
122 header_line_count: consumed,
123 header_byte_span: 0..header_end,
124 normalized_yaml,
125 line_mappings,
126 })
127}
128
129fn split_lines_with_offsets(content: &str) -> Vec<LineSlice<'_>> {
130 let mut lines = Vec::new();
131 let mut idx = 0usize;
132 let bytes = content.as_bytes();
133
134 while idx < content.len() {
135 let mut end = idx;
136 while end < content.len() && bytes[end] != b'\n' {
137 end += 1;
138 }
139 if end < content.len() {
140 end += 1; }
142
143 let full = &content[idx..end];
144 let newline_len = if full.ends_with("\r\n") {
145 2
146 } else if full.ends_with('\n') {
147 1
148 } else {
149 0
150 };
151 let line_without_newline = &full[..full.len().saturating_sub(newline_len)];
152
153 lines.push(LineSlice {
154 line_without_newline,
155 start: idx,
156 end,
157 newline_len,
158 });
159
160 idx = end;
161 }
162
163 lines
164}
165
166fn strip_hashpipe_prefix_once<'a>(line_without_newline: &'a str, prefix: &str) -> Option<&'a str> {
167 let trimmed_start = line_without_newline.trim_start_matches([' ', '\t']);
168 let after_prefix = trimmed_start.strip_prefix(prefix)?;
169 if let Some(rest) = after_prefix.strip_prefix(' ') {
170 return Some(rest);
171 }
172 if let Some(rest) = after_prefix.strip_prefix('\t') {
173 return Some(rest);
174 }
175 Some(after_prefix)
176}
177
178#[cfg(test)]
179mod tests {
180 use super::normalize_hashpipe_header;
181
182 #[test]
183 fn normalizes_supported_prefixes() {
184 for prefix in ["#|", "//|", "--|"] {
185 let input = format!("{prefix} echo: true\n{prefix} warning: false\nx <- 1\n");
186 let normalized = normalize_hashpipe_header(&input, prefix).expect("expected header");
187 assert_eq!(normalized.header_line_count, 2);
188 assert_eq!(
189 normalized.header_byte_span,
190 0..(input.lines().take(2).map(|l| l.len() + 1).sum())
191 );
192 assert_eq!(normalized.normalized_yaml, "echo: true\nwarning: false\n");
193 assert_eq!(normalized.line_mappings.len(), 2);
194 }
195 }
196
197 #[test]
198 fn handles_multiline_quoted_value() {
199 let input = "#| title: \"hello\n#| world\"\n#| echo: true\nbody\n";
200 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
201 assert_eq!(normalized.header_line_count, 3);
202 assert_eq!(
203 normalized.normalized_yaml,
204 "title: \"hello\n world\"\necho: true\n"
205 );
206 }
207
208 #[test]
209 fn handles_flow_collection_and_block_scalar_and_indented_value() {
210 let flow = "#| tags: [a,\n#| b,\n#| c]\ncode\n";
211 let flow_norm = normalize_hashpipe_header(flow, "#|").expect("expected flow header");
212 assert_eq!(flow_norm.header_line_count, 3);
213 assert_eq!(flow_norm.normalized_yaml, "tags: [a,\n b,\n c]\n");
214
215 let block_scalar = "#| fig-cap: |\n#| one\n#| two\n#| echo: true\n";
216 let block_norm =
217 normalize_hashpipe_header(block_scalar, "#|").expect("expected scalar header");
218 assert_eq!(block_norm.header_line_count, 4);
219 assert_eq!(
220 block_norm.normalized_yaml,
221 "fig-cap: |\n one\n two\necho: true\n"
222 );
223
224 let indented = "#| fig-cap:\n#| - A\n#| - B\nplot()\n";
225 let indented_norm =
226 normalize_hashpipe_header(indented, "#|").expect("expected indented header");
227 assert_eq!(indented_norm.header_line_count, 3);
228 assert_eq!(indented_norm.normalized_yaml, "fig-cap:\n - A\n - B\n");
229 }
230
231 #[test]
232 fn keeps_contiguous_prefixed_lines_even_when_not_option_shaped() {
233 let input = "#| fig-subcap:\n#| - ROC\n#| - PR Curve\nx <- 1\n";
234 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
235 assert_eq!(normalized.header_line_count, 3);
236 assert_eq!(
237 normalized.normalized_yaml,
238 "fig-subcap:\n- ROC\n - PR Curve\n"
239 );
240 }
241
242 #[test]
243 fn handles_no_header_and_partial_header() {
244 assert!(normalize_hashpipe_header("plot(1:3)\n#| echo: true\n", "#|").is_none());
245
246 let input = "#| echo: true\nplot(1:3)\n#| warning: false\n";
247 let normalized = normalize_hashpipe_header(input, "#|").expect("expected leading header");
248 assert_eq!(normalized.header_line_count, 1);
249 assert_eq!(normalized.normalized_yaml, "echo: true\n");
250 assert_eq!(normalized.header_byte_span.end, "#| echo: true\n".len());
251 }
252
253 #[test]
254 fn handles_crlf_deterministically() {
255 let input = "#| echo: true\r\n#| warning: false\r\nbody\r\n";
256 let normalized = normalize_hashpipe_header(input, "#|").expect("expected header");
257 assert_eq!(normalized.header_line_count, 2);
258 assert_eq!(normalized.normalized_yaml, "echo: true\n warning: false\n");
259 assert_eq!(normalized.line_mappings[0].host_newline_len, 2);
260 assert_eq!(normalized.line_mappings[1].host_newline_len, 2);
261 assert_eq!(
262 normalized.header_byte_span.end,
263 "#| echo: true\r\n#| warning: false\r\n".len()
264 );
265 }
266}