1use perl_position_tracking::ByteSpan;
7use std::collections::VecDeque;
8use std::sync::Arc;
9
10pub use perl_position_tracking::ByteSpan as Span;
11
12#[derive(Debug, Copy, Clone)]
14pub enum QuoteKind {
15 Unquoted,
17 Single,
19 Double,
21 Backtick,
23}
24
25#[derive(Debug, Clone)]
27pub struct PendingHeredoc {
28 pub label: Arc<str>,
30 pub allow_indent: bool,
32 pub quote: QuoteKind,
34 pub decl_span: ByteSpan,
36 }
39
40#[derive(Debug)]
42pub struct HeredocContent {
43 pub segments: Vec<ByteSpan>,
45 pub full_span: ByteSpan,
47 pub terminated: bool,
49}
50
51#[derive(Debug)]
53pub struct CollectionResult {
54 pub contents: Vec<HeredocContent>,
56 pub terminators_found: Vec<bool>,
58 pub next_offset: usize,
60}
61
62pub fn collect_all(
67 src: &[u8],
68 mut offset: usize,
69 mut pending: VecDeque<PendingHeredoc>,
70) -> CollectionResult {
71 let mut results = Vec::with_capacity(pending.len());
72 let mut terminators_found = Vec::with_capacity(pending.len());
73 while let Some(hd) = pending.pop_front() {
74 let (content, off2, found) = collect_one(src, offset, &hd);
75 results.push(content);
76 terminators_found.push(found);
77 offset = off2;
78 }
79 CollectionResult { contents: results, terminators_found, next_offset: offset }
80}
81
82fn collect_one(src: &[u8], mut off: usize, hd: &PendingHeredoc) -> (HeredocContent, usize, bool) {
88 #[derive(Debug)]
89 struct Line {
90 start: usize,
91 end_no_eol: usize,
92 } let mut raw_lines: Vec<Line> = Vec::new();
95 let mut baseline_indent: Vec<u8> = Vec::new();
96 let mut after_terminator_off = off;
97 let mut found = false;
98
99 while off < src.len() {
101 let (ls, le, next) = next_line_bounds(src, off);
102 let line = &src[ls..le];
103
104 let (lead_ws, rest) = split_leading_ws(line);
106 let rest_no_cr = strip_trailing_cr(rest);
107
108 if rest_no_cr == hd.label.as_bytes() {
109 if hd.allow_indent {
110 baseline_indent.clear();
111 baseline_indent.extend_from_slice(&line[..lead_ws]);
112 } else {
113 baseline_indent.clear();
114 }
115 after_terminator_off = next;
116 found = true;
117 break;
118 }
119
120 raw_lines.push(Line { start: ls, end_no_eol: le });
121 off = next;
122 }
123
124 let segments: Vec<ByteSpan> = raw_lines
125 .iter()
126 .map(|ln| {
127 if baseline_indent.is_empty() {
128 ByteSpan { start: ln.start, end: ln.end_no_eol }
129 } else {
130 let bytes = &src[ln.start..ln.end_no_eol];
131 let strip = common_prefix_len(bytes, &baseline_indent);
132 ByteSpan { start: ln.start + strip, end: ln.end_no_eol }
133 }
134 })
135 .collect();
136
137 let full_span = match (segments.first(), segments.last()) {
138 (Some(f), Some(l)) => ByteSpan { start: f.start, end: l.end },
139 _ => ByteSpan { start: off, end: off }, };
141
142 if !found {
143 return (HeredocContent { segments, full_span, terminated: false }, off, false);
145 }
146
147 (HeredocContent { segments, full_span, terminated: true }, after_terminator_off, true)
148}
149
150fn next_line_bounds(src: &[u8], mut off: usize) -> (usize, usize, usize) {
153 let start = off;
154 while off < src.len() && src[off] != b'\n' && src[off] != b'\r' {
155 off += 1;
156 }
157 let end_no_eol = off;
158 if off < src.len() {
159 if src[off] == b'\r' {
160 off += 1;
161 if off < src.len() && src[off] == b'\n' {
162 off += 1;
163 }
164 } else if src[off] == b'\n' {
165 off += 1;
166 }
167 }
168 (start, end_no_eol, off)
169}
170
171fn split_leading_ws(s: &[u8]) -> (usize, &[u8]) {
173 let mut i = 0;
174 while i < s.len() && (s[i] == b' ' || s[i] == b'\t') {
175 i += 1;
176 }
177 (i, &s[i..])
178}
179
180fn strip_trailing_cr(s: &[u8]) -> &[u8] {
182 if s.last().copied() == Some(b'\r') { &s[..s.len() - 1] } else { s }
183}
184
185fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
187 let n = a.len().min(b.len());
188 let mut i = 0;
189 while i < n && a[i] == b[i] {
190 i += 1;
191 }
192 i
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198 use std::collections::VecDeque;
199 use std::sync::Arc;
200
201 type TestResult = Result<(), Box<dyn std::error::Error>>;
202
203 fn pending(label: &str, allow_indent: bool) -> PendingHeredoc {
204 PendingHeredoc {
205 label: Arc::from(label),
206 allow_indent,
207 quote: QuoteKind::Unquoted,
208 decl_span: ByteSpan { start: 0, end: 0 },
209 }
210 }
211
212 fn slice(src: &[u8], span: ByteSpan) -> Result<&str, Box<dyn std::error::Error>> {
213 Ok(std::str::from_utf8(&src[span.start..span.end])?)
214 }
215
216 #[test]
217 fn collect_all_consumes_heredocs_in_fifo_order() -> TestResult {
218 let src = b"one\nEOF\ntwo\nBAR\nrest";
219 let mut pending_docs = VecDeque::new();
220 pending_docs.push_back(pending("EOF", false));
221 pending_docs.push_back(pending("BAR", false));
222
223 let result = collect_all(src, 0, pending_docs);
224
225 assert_eq!(result.terminators_found, vec![true, true]);
226 assert_eq!(result.contents.len(), 2);
227 assert_eq!(slice(src, result.contents[0].segments[0])?, "one");
228 assert_eq!(slice(src, result.contents[1].segments[0])?, "two");
229 assert_eq!(result.next_offset, 16);
230
231 Ok(())
232 }
233
234 #[test]
235 fn collect_all_strips_indented_heredoc_baseline_from_content_segments() -> TestResult {
236 let src = b" first\n second\n EOF\nafter";
237 let mut pending_docs = VecDeque::new();
238 pending_docs.push_back(pending("EOF", true));
239
240 let result = collect_all(src, 0, pending_docs);
241 let content = &result.contents[0];
242
243 assert_eq!(result.terminators_found, vec![true]);
244 assert!(content.terminated);
245 assert_eq!(slice(src, content.segments[0])?, " first");
246 assert_eq!(slice(src, content.segments[1])?, "second");
247 assert_eq!(content.full_span, ByteSpan { start: 2, end: 18 });
248 assert_eq!(result.next_offset, 25);
249
250 Ok(())
251 }
252
253 #[test]
254 fn collect_all_matches_crlf_terminators_without_including_line_endings() -> TestResult {
255 let src = b"alpha\r\nEOF\r\nafter";
256 let mut pending_docs = VecDeque::new();
257 pending_docs.push_back(pending("EOF", false));
258
259 let result = collect_all(src, 0, pending_docs);
260 let content = &result.contents[0];
261
262 assert_eq!(result.terminators_found, vec![true]);
263 assert_eq!(slice(src, content.segments[0])?, "alpha");
264 assert_eq!(content.full_span, ByteSpan { start: 0, end: 5 });
265 assert_eq!(result.next_offset, 12);
266
267 Ok(())
268 }
269
270 #[test]
271 fn collect_all_reports_unterminated_content_and_stops_at_eof() -> TestResult {
272 let src = b"alpha\nbeta";
273 let mut pending_docs = VecDeque::new();
274 pending_docs.push_back(pending("EOF", false));
275
276 let result = collect_all(src, 0, pending_docs);
277 let content = &result.contents[0];
278
279 assert_eq!(result.terminators_found, vec![false]);
280 assert!(!content.terminated);
281 assert_eq!(content.segments.len(), 2);
282 assert_eq!(slice(src, content.segments[0])?, "alpha");
283 assert_eq!(slice(src, content.segments[1])?, "beta");
284 assert_eq!(result.next_offset, src.len());
285
286 Ok(())
287 }
288
289 #[test]
290 fn collect_all_preserves_spaces_when_indent_is_not_allowed() -> TestResult {
291 let src = b" content\nEOF\n";
292 let mut pending_docs = VecDeque::new();
293 pending_docs.push_back(pending("EOF", false));
294
295 let result = collect_all(src, 0, pending_docs);
296
297 assert_eq!(slice(src, result.contents[0].segments[0])?, " content");
298 assert_eq!(result.contents[0].full_span, ByteSpan { start: 0, end: 9 });
299
300 Ok(())
301 }
302}