use php_ast::*;
use crate::diagnostics::ParseError;
use crate::version::PhpVersion;
pub fn parse_interpolated_parts<'arena, 'src>(
arena: &'arena bumpalo::Bump,
source: &'src str,
inner: &'src str,
base_offset: u32,
version: PhpVersion,
errors: &mut Vec<ParseError>,
) -> ArenaVec<'arena, StringPart<'arena, 'src>> {
let mut parts = ArenaVec::with_capacity_in(8, arena);
let bytes = inner.as_bytes();
let len = bytes.len();
let mut i = 0;
let mut literal_start = 0usize;
let mut owned: Option<String> = None;
while i < len {
match bytes[i] {
b'\\' => {
let buf = owned.get_or_insert_with(|| inner[literal_start..i].to_string());
i = decode_escape_at(bytes, inner, i, buf, errors, base_offset, true);
}
b'$' => {
if i + 1 < len && bytes[i + 1] == b'{' {
if let Some(buf) = owned.take() {
if !buf.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&buf)));
}
} else if i > literal_start {
parts.push(StringPart::Literal(
arena.alloc_str(&inner[literal_start..i]),
));
}
i += 2; let var_offset = base_offset + (i - 2) as u32;
if i < len && bytes[i] == b'$' {
let expr_start = i;
let mut depth = 1usize;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
_ => {}
}
if depth > 0 {
i += 1;
}
}
let inner_expr = parse_complex_interpolation(
arena,
source,
base_offset + expr_start as u32,
base_offset + i as u32,
version,
);
if i < len {
i += 1; } else {
errors.push(ParseError::Forbidden {
message: "unclosed '${' in string interpolation".into(),
span: Span::new(var_offset, base_offset + i as u32),
});
}
parts.push(StringPart::Expr(Expr {
kind: ExprKind::VariableVariable(arena.alloc(inner_expr)),
span: Span::new(var_offset, base_offset + i as u32),
}));
} else {
let name_start = i;
while i < len && is_var_char(bytes[i]) {
i += 1;
}
let var_name: &'src str =
&source[base_offset as usize + name_start..base_offset as usize + i];
let mut expr = Expr {
kind: ExprKind::Variable(NameStr::Src(var_name)),
span: Span::new(
base_offset + name_start as u32,
base_offset + i as u32,
),
};
if i < len && bytes[i] == b'[' {
let bracket_offset = base_offset + i as u32;
i += 1;
let idx_start = i;
while i < len && bytes[i] != b']' && bytes[i] != b'}' {
i += 1;
}
if i < len && bytes[i] == b']' {
let idx_str = &inner[idx_start..i];
i += 1;
if idx_str.is_empty() {
errors.push(ParseError::Forbidden {
message: "empty index in string interpolation".into(),
span: Span::new(bracket_offset, base_offset + i as u32),
});
} else {
let idx_offset = base_offset + idx_start as u32;
let idx_end = base_offset + (i - 1) as u32;
let index_expr = parse_simple_index(
arena, source, idx_str, idx_offset, idx_end,
);
let span = Span::new(var_offset, base_offset + i as u32);
expr = Expr {
kind: ExprKind::ArrayAccess(ArrayAccessExpr {
array: arena.alloc(expr),
index: Some(arena.alloc(index_expr)),
}),
span,
};
}
} else {
errors.push(ParseError::Forbidden {
message: "unclosed '[' in string offset interpolation".into(),
span: Span::new(bracket_offset, base_offset + i as u32),
});
}
}
while i < len && bytes[i] != b'}' {
i += 1;
}
if i < len {
i += 1; }
parts.push(StringPart::Expr(expr));
}
literal_start = i;
} else if i + 1 < len && is_var_start(bytes[i + 1]) {
if let Some(buf) = owned.take() {
if !buf.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&buf)));
}
} else if i > literal_start {
parts.push(StringPart::Literal(
arena.alloc_str(&inner[literal_start..i]),
));
}
let var_start = i;
i += 1; let name_start = i;
while i < len && is_var_char(bytes[i]) {
i += 1;
}
let var_name: &'src str =
&source[base_offset as usize + name_start..base_offset as usize + i];
let var_offset = base_offset + var_start as u32;
let mut expr = Expr {
kind: ExprKind::Variable(NameStr::Src(var_name)),
span: Span::new(var_offset, base_offset + i as u32),
};
if i + 2 < len && bytes[i] == b'-' && bytes[i + 1] == b'>' {
let prop_start = i + 2;
if prop_start < len && is_var_start(bytes[prop_start]) {
i = prop_start;
let pname_start = i;
while i < len && is_var_char(bytes[i]) {
i += 1;
}
let prop_name: &'src str = &source
[base_offset as usize + pname_start..base_offset as usize + i];
let prop_span =
Span::new(base_offset + pname_start as u32, base_offset + i as u32);
let span = Span::new(var_offset, base_offset + i as u32);
expr = Expr {
kind: ExprKind::PropertyAccess(PropertyAccessExpr {
object: arena.alloc(expr),
property: arena.alloc(Expr {
kind: ExprKind::Identifier(NameStr::Src(prop_name)),
span: prop_span,
}),
}),
span,
};
}
}
if i < len && bytes[i] == b'[' {
let bracket_start = i;
i += 1; let idx_start = i;
while i < len && bytes[i] != b']' {
i += 1;
}
if i < len && bytes[i] == b']' {
let idx_str = &inner[idx_start..i];
i += 1;
if idx_str.is_empty() {
errors.push(ParseError::Forbidden {
message: "empty index in string interpolation".into(),
span: Span::new(
base_offset + bracket_start as u32,
base_offset + i as u32,
),
});
} else {
let idx_offset = base_offset + idx_start as u32;
let idx_end = base_offset + (i - 1) as u32;
let index_expr =
parse_simple_index(arena, source, idx_str, idx_offset, idx_end);
let span = Span::new(var_offset, base_offset + i as u32);
expr = Expr {
kind: ExprKind::ArrayAccess(ArrayAccessExpr {
array: arena.alloc(expr),
index: Some(arena.alloc(index_expr)),
}),
span,
};
}
}
}
parts.push(StringPart::Expr(expr));
literal_start = i;
} else {
if let Some(ref mut buf) = owned {
buf.push('$');
}
i += 1;
}
}
b'{' if i + 1 < len && bytes[i + 1] == b'$' => {
if let Some(buf) = owned.take() {
if !buf.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&buf)));
}
} else if i > literal_start {
parts.push(StringPart::Literal(
arena.alloc_str(&inner[literal_start..i]),
));
}
let brace_offset = base_offset + i as u32;
i += 1; let expr_start = i;
let mut depth = 1;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' => {
let quote = bytes[i];
i += 1;
while i < len && bytes[i] != quote {
if bytes[i] == b'\\' {
i += 1; }
i += 1;
}
if i < len {
i += 1; }
continue;
}
_ => {}
}
if depth > 0 {
i += 1;
}
}
let expr_end = i; if depth == 0 {
i += 1; } else {
errors.push(ParseError::Forbidden {
message: "unclosed '{' in string interpolation".into(),
span: Span::new(brace_offset, base_offset + expr_end as u32),
});
}
let expr_offset = base_offset + expr_start as u32;
let end_offset = base_offset + expr_end as u32;
let expr =
parse_complex_interpolation(arena, source, expr_offset, end_offset, version);
if matches!(
expr.kind,
ExprKind::ClassConstAccess(_) | ExprKind::ClassConstAccessDynamic { .. }
) {
errors.push(ParseError::Forbidden {
message: "class constant access is not valid as a standalone interpolation expression".into(),
span: expr.span,
});
}
parts.push(StringPart::Expr(expr));
literal_start = i;
}
_ => {
if let Some(ref mut buf) = owned {
buf.push(bytes[i] as char);
}
i += 1;
}
}
}
if let Some(buf) = owned {
if !buf.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&buf)));
}
} else if i > literal_start {
parts.push(StringPart::Literal(
arena.alloc_str(&inner[literal_start..i]),
));
}
parts
}
pub fn parse_interpolated_parts_indented<'arena, 'src>(
arena: &'arena bumpalo::Bump,
source: &'src str,
raw_body: &'src str,
body_offset: u32,
indent: &str,
version: PhpVersion,
errors: &mut Vec<ParseError>,
) -> ArenaVec<'arena, StringPart<'arena, 'src>> {
let indent_len = indent.len();
let mut parts: ArenaVec<'arena, StringPart<'arena, 'src>> =
ArenaVec::with_capacity_in(4, arena);
let mut literal = String::new();
let bytes = raw_body.as_bytes();
let len = bytes.len();
let mut i = if indent_len > 0 && len >= indent_len && raw_body[..indent_len] == *indent {
indent_len
} else {
0
};
while i < len {
match bytes[i] {
b'\\' => {
i = decode_escape_at(bytes, raw_body, i, &mut literal, errors, body_offset, false);
}
b'\n' => {
literal.push('\n');
i += 1;
if indent_len > 0 && i + indent_len <= len && raw_body[i..i + indent_len] == *indent
{
i += indent_len;
}
}
b'$' => {
if i + 1 < len && is_var_start(bytes[i + 1]) {
if !literal.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&literal)));
literal.clear();
}
let var_start = i;
i += 1; let name_start = i;
while i < len && is_var_char(bytes[i]) {
i += 1;
}
let var_name: &'src str = &raw_body[name_start..i];
let var_offset = body_offset + var_start as u32;
let mut expr = Expr {
kind: ExprKind::Variable(NameStr::Src(var_name)),
span: Span::new(var_offset, body_offset + i as u32),
};
if i + 2 < len && bytes[i] == b'-' && bytes[i + 1] == b'>' {
let prop_start = i + 2;
if prop_start < len && is_var_start(bytes[prop_start]) {
i = prop_start;
let pname_start = i;
while i < len && is_var_char(bytes[i]) {
i += 1;
}
let prop_name: &'src str = &raw_body[pname_start..i];
let prop_span =
Span::new(body_offset + pname_start as u32, body_offset + i as u32);
let span = Span::new(var_offset, body_offset + i as u32);
expr = Expr {
kind: ExprKind::PropertyAccess(PropertyAccessExpr {
object: arena.alloc(expr),
property: arena.alloc(Expr {
kind: ExprKind::Identifier(NameStr::Src(prop_name)),
span: prop_span,
}),
}),
span,
};
}
}
if i < len && bytes[i] == b'[' {
let bracket_start = i;
i += 1; let idx_start = i;
while i < len && bytes[i] != b']' {
i += 1;
}
if i < len && bytes[i] == b']' {
let idx_str = &raw_body[idx_start..i];
i += 1; if idx_str.is_empty() {
errors.push(ParseError::Forbidden {
message: "empty index in string interpolation".into(),
span: Span::new(
body_offset + bracket_start as u32,
body_offset + i as u32,
),
});
} else {
let idx_offset = body_offset + idx_start as u32;
let idx_end = body_offset + (i - 1) as u32;
let index_expr =
parse_simple_index(arena, source, idx_str, idx_offset, idx_end);
let span = Span::new(var_offset, body_offset + i as u32);
expr = Expr {
kind: ExprKind::ArrayAccess(ArrayAccessExpr {
array: arena.alloc(expr),
index: Some(arena.alloc(index_expr)),
}),
span,
};
}
}
}
parts.push(StringPart::Expr(expr));
} else {
literal.push('$');
i += 1;
}
}
b'{' if i + 1 < len && bytes[i + 1] == b'$' => {
if !literal.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&literal)));
literal.clear();
}
let brace_offset = body_offset + i as u32;
i += 1; let expr_start = i;
let mut depth = 1;
while i < len && depth > 0 {
match bytes[i] {
b'{' => depth += 1,
b'}' => depth -= 1,
b'\'' | b'"' => {
let quote = bytes[i];
i += 1;
while i < len && bytes[i] != quote {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
if i < len {
i += 1;
}
continue;
}
_ => {}
}
if depth > 0 {
i += 1;
}
}
let expr_end = i;
if depth == 0 {
i += 1; } else {
errors.push(ParseError::Forbidden {
message: "unclosed '{' in string interpolation".into(),
span: Span::new(brace_offset, body_offset + expr_end as u32),
});
}
let expr_offset = body_offset + expr_start as u32;
let end_offset = body_offset + expr_end as u32;
let expr =
parse_complex_interpolation(arena, source, expr_offset, end_offset, version);
if matches!(
expr.kind,
ExprKind::ClassConstAccess(_) | ExprKind::ClassConstAccessDynamic { .. }
) {
errors.push(ParseError::Forbidden {
message: "class constant access is not valid as a standalone interpolation expression".into(),
span: expr.span,
});
}
parts.push(StringPart::Expr(expr));
}
_ => {
literal.push(bytes[i] as char);
i += 1;
}
}
}
if !literal.is_empty() {
parts.push(StringPart::Literal(arena.alloc_str(&literal)));
}
parts
}
fn decode_escape_at(
bytes: &[u8],
text: &str,
i: usize,
out: &mut String,
errors: &mut Vec<ParseError>,
span_base: u32,
stop_at_dquote: bool,
) -> usize {
let len = bytes.len();
if i + 1 >= len {
out.push('\\');
return i + 1;
}
let next = bytes[i + 1];
match next {
b'$' => {
out.push('$');
i + 2
}
b'\\' => {
out.push('\\');
i + 2
}
b'n' => {
out.push('\n');
i + 2
}
b'r' => {
out.push('\r');
i + 2
}
b't' => {
out.push('\t');
i + 2
}
b'v' => {
out.push('\x0B');
i + 2
}
b'e' => {
out.push('\x1B');
i + 2
}
b'f' => {
out.push('\x0C');
i + 2
}
b'"' => {
out.push('"');
i + 2
}
b'x' | b'X' => {
let mut j = i + 2;
let start = j;
while j < len && j - start < 2 && bytes[j].is_ascii_hexdigit() {
j += 1;
}
if j > start {
if let Ok(val) = u8::from_str_radix(&text[start..j], 16) {
out.push(val as char);
}
} else {
out.push('\\');
out.push('x');
}
j
}
b'u' => {
let escape_start = i;
let mut j = i + 2;
if j < len && bytes[j] == b'{' {
j += 1;
let start = j;
while j < len && bytes[j].is_ascii_hexdigit() {
j += 1;
}
if j < len && bytes[j] == b'}' {
let hex = &text[start..j];
j += 1;
let span = Span::new(span_base + escape_start as u32, span_base + j as u32);
if hex.is_empty() {
errors.push(ParseError::Forbidden {
message: "Invalid UTF-8 codepoint escape sequence: empty code point"
.into(),
span,
});
} else if let Ok(codepoint) = u32::from_str_radix(hex, 16) {
if let Some(c) = char::from_u32(codepoint) {
out.push(c);
} else {
errors.push(ParseError::Forbidden {
message:
"Invalid UTF-8 codepoint escape sequence: Codepoint too large"
.into(),
span,
});
}
}
} else {
while j < len
&& bytes[j] != b'}'
&& bytes[j] != b'\n'
&& !(stop_at_dquote && bytes[j] == b'"')
{
j += 1;
}
if j < len && bytes[j] == b'}' {
j += 1;
}
errors.push(ParseError::Forbidden {
message: "Invalid UTF-8 codepoint escape sequence".into(),
span: Span::new(span_base + escape_start as u32, span_base + j as u32),
});
}
} else {
out.push('\\');
out.push('u');
}
j
}
b'0'..=b'7' => {
let start = i + 1;
let mut j = i + 1;
while j < len && j - start < 3 && bytes[j] >= b'0' && bytes[j] <= b'7' {
j += 1;
}
if let Ok(val) = u8::from_str_radix(&text[start..j], 8) {
out.push(val as char);
}
j
}
_ => {
out.push('\\');
out.push(next as char);
i + 2
}
}
}
pub fn has_interpolation(inner: &str) -> bool {
let bytes = inner.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2; continue;
}
if bytes[i] == b'$'
&& i + 1 < bytes.len()
&& (is_var_start(bytes[i + 1]) || bytes[i + 1] == b'{')
{
return true;
}
if bytes[i] == b'{' && i + 1 < bytes.len() && bytes[i + 1] == b'$' {
return true;
}
i += 1;
}
false
}
fn is_var_start(b: u8) -> bool {
b.is_ascii_alphabetic() || b == b'_' || b >= 0x80
}
fn is_var_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_' || b >= 0x80
}
fn parse_simple_index<'arena, 'src>(
arena: &'arena bumpalo::Bump,
source: &'src str,
idx_str: &str,
idx_offset: u32,
idx_end: u32,
) -> Expr<'arena, 'src> {
let span = Span::new(idx_offset, idx_end);
if let Some(digits) = idx_str.strip_prefix('-') {
if is_php_interp_nonzero_int(digits) {
if let Ok(num) = digits.parse::<i64>() {
return Expr {
kind: ExprKind::Int(-num),
span,
};
}
}
} else if is_php_interp_int(idx_str) {
if let Ok(num) = idx_str.parse::<i64>() {
return Expr {
kind: ExprKind::Int(num),
span,
};
}
}
if idx_str.starts_with('$') && idx_str.len() > 1 {
let name_start = idx_offset as usize + 1;
let name_end = idx_offset as usize + idx_str.len();
return Expr {
kind: ExprKind::Variable(NameStr::Src(&source[name_start..name_end])),
span,
};
}
let key_start = idx_offset as usize;
let key_end = idx_end as usize;
Expr {
kind: ExprKind::String(arena.alloc_str(&source[key_start..key_end])),
span,
}
}
fn is_php_interp_int(s: &str) -> bool {
match s.as_bytes() {
[b'0'] => true,
[first, rest @ ..] if *first >= b'1' && *first <= b'9' => {
rest.iter().all(|b| b.is_ascii_digit())
}
_ => false,
}
}
fn is_php_interp_nonzero_int(s: &str) -> bool {
match s.as_bytes() {
[first, rest @ ..] if *first >= b'1' && *first <= b'9' => {
rest.iter().all(|b| b.is_ascii_digit())
}
_ => false,
}
}
fn parse_complex_interpolation<'arena, 'src>(
arena: &'arena bumpalo::Bump,
source: &'src str,
offset: u32,
end: u32,
version: PhpVersion,
) -> Expr<'arena, 'src> {
let mut sub = crate::parser::Parser::new_at(arena, source, offset as usize, version);
let expr = crate::expr::parse_expr(&mut sub);
if matches!(expr.kind, ExprKind::Error) {
Expr {
kind: ExprKind::Error,
span: Span::new(offset, end),
}
} else {
expr
}
}
#[cfg(test)]
mod tests {
#[allow(unused_imports)]
use super::*;
#[test]
fn indented_heredoc_simple_var() {
let arena = bumpalo::Bump::new();
let result = crate::parse(&arena, "<?php\n$x = <<<END\n Hello $name!\n END;\n");
assert!(result.errors.is_empty(), "{:?}", result.errors);
}
#[test]
fn indented_heredoc_complex_interpolation() {
let arena = bumpalo::Bump::new();
let result = crate::parse(
&arena,
"<?php\n$x = <<<END\n Hello {$obj->name}!\n END;\n",
);
assert!(result.errors.is_empty(), "{:?}", result.errors);
}
#[test]
fn indented_heredoc_multiline_interpolation() {
let arena = bumpalo::Bump::new();
let result = crate::parse(
&arena,
"<?php\n$x = <<<END\n Line 1 {$a}\n Line 2 {$b}\n END;\n",
);
assert!(result.errors.is_empty(), "{:?}", result.errors);
}
#[test]
fn indented_nowdoc() {
let arena = bumpalo::Bump::new();
let result = crate::parse(&arena, "<?php\n$x = <<<'END'\n Hello world!\n END;\n");
assert!(result.errors.is_empty(), "{:?}", result.errors);
}
}