fn after_line_break(src: &[u8], mut off: usize) -> usize {
while off < src.len() && src[off] != b'\n' && src[off] != b'\r' {
off += 1;
}
if off < src.len() {
if src[off] == b'\r' {
off += 1;
if off < src.len() && src[off] == b'\n' {
off += 1;
}
} else if src[off] == b'\n' {
off += 1;
}
}
off
}
fn unescape_label(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\\' {
if let Some(next) = chars.next() {
match next {
'n' => out.push('\n'),
'r' => out.push('\r'),
't' => out.push('\t'),
'\\' => out.push('\\'),
'"' => out.push('"'),
'\'' => out.push('\''),
'$' => out.push('$'),
'@' => out.push('@'),
_ => {
out.push('\\');
out.push(next);
}
}
} else {
out.push('\\');
}
} else {
out.push(c);
}
}
out
}
fn parse_heredoc_delimiter(s: &str) -> (String, bool, bool, bool) {
let mut chars = s.chars();
chars.next();
chars.next();
let indented = if chars.as_str().starts_with('~') {
chars.next();
true
} else {
false
};
let rest = chars.as_str().trim();
if rest.is_empty() || rest.starts_with(';') {
return (String::new(), true, indented, false);
}
let (delimiter, interpolated, command) =
if rest.starts_with('"') && rest.ends_with('"') && rest.len() >= 2 {
(unescape_label(&rest[1..rest.len() - 1]), true, false)
} else if rest.starts_with('\'') && rest.ends_with('\'') && rest.len() >= 2 {
(rest[1..rest.len() - 1].to_string(), false, false)
} else if rest.starts_with('`') && rest.ends_with('`') && rest.len() >= 2 {
(unescape_label(&rest[1..rest.len() - 1]), true, true)
} else {
(rest.to_string(), true, false)
};
(delimiter, interpolated, indented, command)
}
fn map_heredoc_quote_kind(text: &str, _interpolated: bool) -> heredoc_collector::QuoteKind {
let rest = text.trim_start_matches('<').trim_start_matches('~').trim();
if rest.starts_with('\'') && rest.ends_with('\'') {
heredoc_collector::QuoteKind::Single
} else if rest.starts_with('"') && rest.ends_with('"') {
heredoc_collector::QuoteKind::Double
} else if rest.starts_with('`') && rest.ends_with('`') {
heredoc_collector::QuoteKind::Backtick
} else {
heredoc_collector::QuoteKind::Unquoted
}
}
const MAX_HEREDOC_DEPTH: usize = 100;
const HEREDOC_TIMEOUT_MS: u64 = 5000;
impl<'a> Parser<'a> {
fn push_heredoc_decl(
&mut self,
label: String,
allow_indent: bool,
quote: heredoc_collector::QuoteKind,
decl_start: usize,
decl_end: usize,
) {
if self.pending_heredocs.len() >= MAX_HEREDOC_DEPTH {
self.errors.push(ParseError::syntax(
format!("Heredoc depth limit exceeded (max {})", MAX_HEREDOC_DEPTH),
decl_start,
));
return;
}
if self.pending_heredocs.is_empty() {
self.heredoc_start_time = Some(Instant::now());
}
self.pending_heredocs.push_back(PendingHeredoc {
label: Arc::from(label.as_str()),
allow_indent,
quote,
decl_span: heredoc_collector::Span { start: decl_start, end: decl_end },
});
}
fn drain_pending_heredocs(&mut self, root: &mut Node) {
if self.pending_heredocs.is_empty() {
self.heredoc_start_time = None;
return;
}
if let Some(start) = self.heredoc_start_time {
if start.elapsed().as_millis() > HEREDOC_TIMEOUT_MS as u128 {
self.errors.push(ParseError::syntax(
format!("Heredoc parsing timed out (> {}ms)", HEREDOC_TIMEOUT_MS),
self.byte_cursor,
));
self.pending_heredocs.clear();
self.heredoc_start_time = None;
return;
}
}
self.byte_cursor = after_line_break(self.src_bytes, self.byte_cursor);
let pending: Vec<_> = self.pending_heredocs.iter().cloned().collect();
let out = collect_all(
self.src_bytes,
self.byte_cursor,
std::mem::take(&mut self.pending_heredocs),
);
for (decl, body) in pending.into_iter().zip(out.contents.into_iter()) {
let mut attached = self.try_attach_heredoc_at_node(root, decl.decl_span, &body);
if !attached {
attached = self.try_attach_next_unresolved_heredoc(root, &body);
}
if !body.terminated {
let label = if decl.label.is_empty() { "<empty>" } else { decl.label.as_ref() };
self.errors.push(ParseError::SyntaxError {
message: format!("Unterminated heredoc: {}", label),
location: self.src_bytes.len(),
});
}
#[cfg(debug_assertions)]
if !attached {
eprintln!(
"[WARNING] drain_pending_heredocs: Failed to attach heredoc content at span {}..{} - no matching Heredoc node found in AST",
decl.decl_span.start, decl.decl_span.end
);
}
}
self.byte_cursor = out.next_offset;
}
fn try_attach_heredoc_at_node(
&self,
root: &mut Node,
decl_span: heredoc_collector::Span,
body: &HeredocContent,
) -> bool {
self.try_attach_at_node(root, decl_span, body)
}
fn try_attach_at_node(
&self,
node: &mut Node,
decl_span: heredoc_collector::Span,
body: &HeredocContent,
) -> bool {
let node_matches =
node.location.start == decl_span.start && node.location.end == decl_span.end;
if node_matches {
if let NodeKind::Heredoc { content, body_span, .. } = &mut node.kind {
let mut s = String::new();
for (i, seg) in body.segments.iter().enumerate() {
if seg.end > seg.start {
let bytes = &self.src_bytes[seg.start..seg.end];
s.push_str(std::str::from_utf8(bytes).unwrap_or_default());
}
if i + 1 < body.segments.len() {
s.push('\n');
}
}
*content = s;
*body_span = if body.full_span.start < body.full_span.end {
Some(SourceLocation {
start: body.full_span.start,
end: body.full_span.end,
})
} else {
None };
return true;
}
}
let mut found = false;
node.for_each_child_mut(|child| {
if !found && self.try_attach_at_node(child, decl_span, body) {
found = true;
}
});
#[cfg(debug_assertions)]
if !found && node_matches {
eprintln!(
"warn: no Heredoc node found for decl span {}..{} (matched span but not Heredoc kind)",
decl_span.start, decl_span.end
);
}
found
}
fn try_attach_next_unresolved_heredoc(&self, root: &mut Node, body: &HeredocContent) -> bool {
self.try_attach_at_next_unresolved_node(root, body)
}
fn try_attach_at_next_unresolved_node(&self, node: &mut Node, body: &HeredocContent) -> bool {
if let NodeKind::Heredoc { content, body_span, .. } = &mut node.kind {
let unresolved = content.is_empty() && body_span.is_none();
if unresolved {
let mut text = String::new();
for (i, seg) in body.segments.iter().enumerate() {
if seg.end > seg.start {
let bytes = &self.src_bytes[seg.start..seg.end];
text.push_str(std::str::from_utf8(bytes).unwrap_or_default());
}
if i + 1 < body.segments.len() {
text.push('\n');
}
}
*content = text;
*body_span = if body.full_span.start < body.full_span.end {
Some(SourceLocation {
start: body.full_span.start,
end: body.full_span.end,
})
} else {
None
};
return true;
}
}
let mut found = false;
node.for_each_child_mut(|child| {
if !found && self.try_attach_at_next_unresolved_node(child, body) {
found = true;
}
});
found
}
}
#[cfg(test)]
mod heredoc_fuzz_tests {
use super::{map_heredoc_quote_kind, parse_heredoc_delimiter};
use crate::engine::parser::heredoc_collector::QuoteKind;
use proptest::prelude::*;
fn heredoc_text_strategy() -> impl Strategy<Value = String> {
prop::collection::vec(any::<char>(), 0..64).prop_map(|chars| chars.into_iter().collect())
}
proptest! {
#[test]
fn fuzz_parse_heredoc_delimiter_never_panics(input in heredoc_text_strategy()) {
let source = format!("<<{input}");
let (delimiter, _interpolated, _indented, _command) = parse_heredoc_delimiter(&source);
prop_assert!(delimiter.len() <= source.len());
}
#[test]
fn fuzz_tilde_prefix_sets_indent_flag(payload in heredoc_text_strategy()) {
let source = format!("<<~{payload}");
let (_delimiter, _interpolated, indented, _command) = parse_heredoc_delimiter(&source);
prop_assert!(indented);
}
#[test]
fn fuzz_quote_kind_matches_outer_delimiters(label in "[^\n\r']{0,32}") {
let single = format!("<<'{label}'");
prop_assert!(matches!(map_heredoc_quote_kind(&single, false), QuoteKind::Single));
let double = format!("<<\"{label}\"");
prop_assert!(matches!(map_heredoc_quote_kind(&double, true), QuoteKind::Double));
let backtick = format!("<<`{label}`");
prop_assert!(matches!(map_heredoc_quote_kind(&backtick, true), QuoteKind::Backtick));
}
}
}