use std::collections::HashMap;
use std::mem::ManuallyDrop;
use std::sync::{LazyLock, Mutex};
use php_ast::{Program, Span, TypeHint, TypeHintKind};
use tower_lsp::lsp_types::{Position, Range};
pub type MethodReturnsMap = HashMap<String, HashMap<String, String>>;
const POOL_CAP: usize = 8;
struct BumpPool {
#[allow(clippy::vec_box)]
pool: Mutex<Vec<Box<bumpalo::Bump>>>,
}
impl BumpPool {
fn take(&self) -> Box<bumpalo::Bump> {
self.pool
.lock()
.unwrap()
.pop()
.unwrap_or_else(|| Box::new(bumpalo::Bump::new()))
}
fn give(&self, mut arena: Box<bumpalo::Bump>) {
arena.reset();
let mut p = self.pool.lock().unwrap();
if p.len() < POOL_CAP {
p.push(arena);
}
}
}
static BUMP_POOL: LazyLock<BumpPool> = LazyLock::new(|| BumpPool {
pool: Mutex::new(Vec::new()),
});
struct ArenaGuard(Option<Box<bumpalo::Bump>>);
impl Drop for ArenaGuard {
fn drop(&mut self) {
if let Some(arena) = self.0.take() {
BUMP_POOL.give(arena);
}
}
}
pub struct ParsedDoc {
program: ManuallyDrop<Box<Program<'static, 'static>>>,
pub errors: Vec<php_rs_parser::diagnostics::ParseError>,
#[allow(clippy::box_collection)]
_source: Box<String>,
line_starts: Vec<u32>,
_arena: ArenaGuard,
}
impl Drop for ParsedDoc {
fn drop(&mut self) {
unsafe { ManuallyDrop::drop(&mut self.program) };
}
}
unsafe impl Send for ParsedDoc {}
unsafe impl Sync for ParsedDoc {}
impl ParsedDoc {
pub fn parse(source: String) -> Self {
let source_box = Box::new(source);
let arena_box = BUMP_POOL.take();
let src_ref: &'static str =
unsafe { std::mem::transmute::<&str, &'static str>(source_box.as_str()) };
let arena_ref: &'static bumpalo::Bump = unsafe {
std::mem::transmute::<&bumpalo::Bump, &'static bumpalo::Bump>(arena_box.as_ref())
};
let result = php_rs_parser::parse(arena_ref, src_ref);
let line_starts = build_line_starts(src_ref);
ParsedDoc {
program: ManuallyDrop::new(Box::new(result.program)),
errors: result.errors,
_source: source_box,
line_starts,
_arena: ArenaGuard(Some(arena_box)),
}
}
#[inline]
pub fn program(&self) -> &Program<'_, '_> {
&self.program
}
#[inline]
pub fn source(&self) -> &str {
&self._source
}
pub fn line_starts(&self) -> &[u32] {
&self.line_starts
}
pub fn view(&self) -> SourceView<'_> {
SourceView {
source: self.source(),
line_starts: self.line_starts(),
}
}
}
impl Default for ParsedDoc {
fn default() -> Self {
ParsedDoc::parse(String::new())
}
}
fn build_line_starts(source: &str) -> Vec<u32> {
let mut starts = vec![0u32];
for (i, b) in source.bytes().enumerate() {
if b == b'\n' {
starts.push(i as u32 + 1);
}
}
starts
}
#[derive(Copy, Clone)]
pub struct SourceView<'a> {
source: &'a str,
line_starts: &'a [u32],
}
impl<'a> SourceView<'a> {
#[inline]
pub fn source(self) -> &'a str {
self.source
}
pub fn position_of(self, offset: u32) -> Position {
offset_to_position(self.source, self.line_starts, offset)
}
#[inline]
pub fn line_starts(self) -> &'a [u32] {
self.line_starts
}
#[inline]
pub fn line_of(self, offset: u32) -> u32 {
match self.line_starts.partition_point(|&s| s <= offset) {
0 => 0,
i => (i - 1) as u32,
}
}
pub fn byte_of_position(self, pos: Position) -> u32 {
let line_idx = pos.line as usize;
let line_start = self.line_starts.get(line_idx).copied().unwrap_or(0) as usize;
let line_end = self
.line_starts
.get(line_idx + 1)
.map(|&s| (s as usize).saturating_sub(1))
.unwrap_or(self.source.len());
let raw = &self.source[line_start..line_end.min(self.source.len())];
let line = raw.strip_suffix('\r').unwrap_or(raw);
let mut col_utf16: u32 = 0;
let mut byte_in_line: usize = 0;
for ch in line.chars() {
if col_utf16 >= pos.character {
break;
}
col_utf16 += ch.len_utf16() as u32;
byte_in_line += ch.len_utf8();
}
(line_start + byte_in_line) as u32
}
pub fn range_of(self, span: Span) -> Range {
Range {
start: self.position_of(span.start),
end: self.position_of(span.end),
}
}
pub fn name_range(self, name: &str) -> Range {
let start = str_offset(self.source, name);
Range {
start: self.position_of(start),
end: self.position_of(start + name.len() as u32),
}
}
}
pub fn offset_to_position(source: &str, line_starts: &[u32], offset: u32) -> Position {
let offset_usize = (offset as usize).min(source.len());
let line = match line_starts.partition_point(|&s| s <= offset) {
0 => 0u32,
i => (i - 1) as u32,
};
let line_start = line_starts.get(line as usize).copied().unwrap_or(0) as usize;
let segment = &source[line_start..offset_usize];
let segment = segment.strip_suffix('\r').unwrap_or(segment);
let character = segment.chars().map(|c| c.len_utf16() as u32).sum::<u32>();
Position { line, character }
}
pub fn span_to_range(source: &str, line_starts: &[u32], span: Span) -> Range {
Range {
start: offset_to_position(source, line_starts, span.start),
end: offset_to_position(source, line_starts, span.end),
}
}
pub fn str_offset(source: &str, substr: &str) -> u32 {
let src_ptr = source.as_ptr() as usize;
let sub_ptr = substr.as_ptr() as usize;
if sub_ptr >= src_ptr && sub_ptr + substr.len() <= src_ptr + source.len() {
return (sub_ptr - src_ptr) as u32;
}
source.find(substr).unwrap_or(0) as u32
}
pub fn name_range(source: &str, line_starts: &[u32], name: &str) -> Range {
let start = str_offset(source, name);
Range {
start: offset_to_position(source, line_starts, start),
end: offset_to_position(source, line_starts, start + name.len() as u32),
}
}
pub fn format_type_hint(hint: &TypeHint<'_, '_>) -> String {
fmt_kind(&hint.kind)
}
fn fmt_kind(kind: &TypeHintKind<'_, '_>) -> String {
match kind {
TypeHintKind::Named(name) => name.to_string_repr().to_string(),
TypeHintKind::Keyword(builtin, _) => builtin.as_str().to_string(),
TypeHintKind::Nullable(inner) => format!("?{}", format_type_hint(inner)),
TypeHintKind::Union(types) => types
.iter()
.map(format_type_hint)
.collect::<Vec<_>>()
.join("|"),
TypeHintKind::Intersection(types) => types
.iter()
.map(format_type_hint)
.collect::<Vec<_>>()
.join("&"),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_empty_source() {
let doc = ParsedDoc::parse("<?php".to_string());
assert!(doc.errors.is_empty());
assert!(doc.program().stmts.is_empty());
}
#[test]
fn parses_function() {
let doc = ParsedDoc::parse("<?php\nfunction foo() {}".to_string());
assert_eq!(doc.program().stmts.len(), 1);
}
#[test]
fn offset_to_position_first_line() {
let src = "<?php\nfoo";
let doc = ParsedDoc::parse(src.to_string());
assert_eq!(
offset_to_position(src, doc.line_starts(), 0),
Position {
line: 0,
character: 0
}
);
}
#[test]
fn offset_to_position_second_line() {
let src = "<?php\nfoo";
let doc = ParsedDoc::parse(src.to_string());
assert_eq!(
offset_to_position(src, doc.line_starts(), 6),
Position {
line: 1,
character: 0
}
);
}
#[test]
fn offset_to_position_multibyte_utf16() {
let src = "a\u{1F600}b";
let doc = ParsedDoc::parse(src.to_string());
assert_eq!(
offset_to_position(src, doc.line_starts(), 5), Position {
line: 0,
character: 3
} );
}
#[test]
fn offset_to_position_crlf_start_of_line() {
let src = "foo\r\nbar";
let doc = ParsedDoc::parse(src.to_string());
assert_eq!(
offset_to_position(src, doc.line_starts(), 5), Position {
line: 1,
character: 0
}
);
}
#[test]
fn offset_to_position_crlf_does_not_count_cr_in_column() {
let src = "foo\r\nbar";
let doc = ParsedDoc::parse(src.to_string());
assert_eq!(
offset_to_position(src, doc.line_starts(), 3), Position {
line: 0,
character: 3
}
);
}
#[test]
fn offset_to_position_crlf_multiline() {
let src = "a\r\nb\r\nc";
let doc = ParsedDoc::parse(src.to_string());
assert_eq!(
offset_to_position(src, doc.line_starts(), 6), Position {
line: 2,
character: 0
}
);
assert_eq!(
offset_to_position(src, doc.line_starts(), 3), Position {
line: 1,
character: 0
}
);
}
#[test]
fn str_offset_finds_substr() {
let src = "<?php\nfunction foo() {}";
let name = &src[15..18]; assert_eq!(str_offset(src, name), 15);
}
#[test]
fn str_offset_content_fallback_for_different_allocation() {
let owned = "foo".to_string();
assert_eq!(str_offset("<?php foo", &owned), 6);
}
#[test]
fn str_offset_unrelated_content_returns_zero() {
let owned = "bar".to_string();
assert_eq!(str_offset("<?php foo", &owned), 0);
}
}