1use std::collections::HashMap;
3use std::mem::ManuallyDrop;
4use std::sync::{Arc, LazyLock, Mutex};
5
6use php_ast::{Program, Span, TypeHint, TypeHintKind};
7use tower_lsp::lsp_types::{Position, Range};
8
9pub type MethodReturnsMap = HashMap<String, HashMap<String, String>>;
12
13const POOL_CAP: usize = 8;
16
17struct BumpPool {
18 #[allow(clippy::vec_box)]
22 pool: Mutex<Vec<Box<bumpalo::Bump>>>,
23}
24
25impl BumpPool {
26 fn take(&self) -> Box<bumpalo::Bump> {
27 self.pool
28 .lock()
29 .unwrap()
30 .pop()
31 .unwrap_or_else(|| Box::new(bumpalo::Bump::new()))
32 }
33
34 fn give(&self, mut arena: Box<bumpalo::Bump>) {
35 arena.reset();
36 let mut p = self.pool.lock().unwrap();
37 if p.len() < POOL_CAP {
38 p.push(arena);
39 }
40 }
41}
42
43static BUMP_POOL: LazyLock<BumpPool> = LazyLock::new(|| BumpPool {
44 pool: Mutex::new(Vec::new()),
45});
46
47struct ArenaGuard(Option<Box<bumpalo::Bump>>);
51
52impl Drop for ArenaGuard {
53 fn drop(&mut self) {
54 if let Some(arena) = self.0.take() {
55 BUMP_POOL.give(arena);
56 }
57 }
58}
59
60pub struct ParsedDoc {
77 program: ManuallyDrop<Box<Program<'static, 'static>>>,
78 pub errors: Vec<php_rs_parser::diagnostics::ParseError>,
79 _source: Arc<str>,
80 line_starts: Vec<u32>,
81 _arena: ArenaGuard,
82}
83
84impl Drop for ParsedDoc {
85 fn drop(&mut self) {
86 unsafe { ManuallyDrop::drop(&mut self.program) };
90 }
91}
92
93unsafe impl Send for ParsedDoc {}
95unsafe impl Sync for ParsedDoc {}
96
97impl ParsedDoc {
98 pub fn parse(source: impl Into<Arc<str>>) -> Self {
102 let source: Arc<str> = source.into();
103 let arena_box = BUMP_POOL.take();
105
106 let src_ref: &'static str = unsafe { std::mem::transmute::<&str, &'static str>(&*source) };
112 let arena_ref: &'static bumpalo::Bump = unsafe {
113 std::mem::transmute::<&bumpalo::Bump, &'static bumpalo::Bump>(arena_box.as_ref())
114 };
115
116 let result = php_rs_parser::parse(arena_ref, src_ref);
117
118 let line_starts = build_line_starts(src_ref);
119
120 ParsedDoc {
121 program: ManuallyDrop::new(Box::new(result.program)),
122 errors: result.errors,
123 _source: source,
124 line_starts,
125 _arena: ArenaGuard(Some(arena_box)),
126 }
127 }
128
129 #[inline]
134 pub fn program(&self) -> &Program<'_, '_> {
135 &self.program
136 }
137
138 #[inline]
140 pub fn source(&self) -> &str {
141 &self._source
142 }
143
144 #[inline]
150 pub fn source_arc(&self) -> Arc<str> {
151 self._source.clone()
152 }
153
154 pub fn line_starts(&self) -> &[u32] {
157 &self.line_starts
158 }
159
160 pub fn view(&self) -> SourceView<'_> {
162 SourceView {
163 source: self.source(),
164 line_starts: self.line_starts(),
165 }
166 }
167}
168
169impl Default for ParsedDoc {
170 fn default() -> Self {
171 ParsedDoc::parse("")
172 }
173}
174
175fn build_line_starts(source: &str) -> Vec<u32> {
180 let mut starts = vec![0u32];
181 for (i, b) in source.bytes().enumerate() {
182 if b == b'\n' {
183 starts.push(i as u32 + 1);
184 }
185 }
186 starts
187}
188
189#[derive(Copy, Clone)]
192pub struct SourceView<'a> {
193 source: &'a str,
194 line_starts: &'a [u32],
195}
196
197impl<'a> SourceView<'a> {
198 #[inline]
199 pub fn source(self) -> &'a str {
200 self.source
201 }
202
203 pub fn position_of(self, offset: u32) -> Position {
204 offset_to_position(self.source, self.line_starts, offset)
205 }
206
207 #[inline]
208 pub fn line_starts(self) -> &'a [u32] {
209 self.line_starts
210 }
211
212 #[inline]
215 pub fn line_of(self, offset: u32) -> u32 {
216 match self.line_starts.partition_point(|&s| s <= offset) {
217 0 => 0,
218 i => (i - 1) as u32,
219 }
220 }
221
222 pub fn byte_of_position(self, pos: Position) -> u32 {
227 let line_idx = pos.line as usize;
228 let line_start = self.line_starts.get(line_idx).copied().unwrap_or(0) as usize;
229 let line_end = self
230 .line_starts
231 .get(line_idx + 1)
232 .map(|&s| (s as usize).saturating_sub(1))
233 .unwrap_or(self.source.len());
234 let raw = &self.source[line_start..line_end.min(self.source.len())];
235 let line = raw.strip_suffix('\r').unwrap_or(raw);
236 let mut col_utf16: u32 = 0;
237 let mut byte_in_line: usize = 0;
238 for ch in line.chars() {
239 if col_utf16 >= pos.character {
240 break;
241 }
242 col_utf16 += ch.len_utf16() as u32;
243 byte_in_line += ch.len_utf8();
244 }
245 (line_start + byte_in_line) as u32
246 }
247
248 pub fn range_of(self, span: Span) -> Range {
249 Range {
250 start: self.position_of(span.start),
251 end: self.position_of(span.end),
252 }
253 }
254
255 pub fn name_range(self, name: &str) -> Range {
256 let start = str_offset(self.source, name).unwrap_or(0);
257 Range {
258 start: self.position_of(start),
259 end: self.position_of(start + name.len() as u32),
260 }
261 }
262
263 pub fn name_range_in_span(self, name: &str, span: php_ast::Span) -> Range {
269 let s = span.start as usize;
270 let e = (span.end as usize).min(self.source.len());
271 let start = self
272 .source
273 .get(s..e)
274 .and_then(|slice| slice.find(name))
275 .map(|off| span.start + off as u32)
276 .unwrap_or_else(|| str_offset(self.source, name).unwrap_or(0));
277 Range {
278 start: self.position_of(start),
279 end: self.position_of(start + name.len() as u32),
280 }
281 }
282}
283
284pub fn offset_to_position(source: &str, line_starts: &[u32], offset: u32) -> Position {
291 let offset_usize = (offset as usize).min(source.len());
292 let line = match line_starts.partition_point(|&s| s <= offset) {
294 0 => 0u32,
295 i => (i - 1) as u32,
296 };
297 let line_start = line_starts.get(line as usize).copied().unwrap_or(0) as usize;
298 let segment = &source[line_start..offset_usize];
299 let segment = segment.strip_suffix('\r').unwrap_or(segment);
301 let character = segment.chars().map(|c| c.len_utf16() as u32).sum::<u32>();
302 Position { line, character }
303}
304
305pub fn span_to_range(source: &str, line_starts: &[u32], span: Span) -> Range {
307 Range {
308 start: offset_to_position(source, line_starts, span.start),
309 end: offset_to_position(source, line_starts, span.end),
310 }
311}
312
313pub fn str_offset(source: &str, substr: &str) -> Option<u32> {
324 let src_ptr = source.as_ptr() as usize;
325 let sub_ptr = substr.as_ptr() as usize;
326 if sub_ptr >= src_ptr && sub_ptr + substr.len() <= src_ptr + source.len() {
327 return Some((sub_ptr - src_ptr) as u32);
328 }
329 let mut search_pos = 0;
333 while let Some(offset) = source[search_pos..].find(substr) {
334 let abs_offset = search_pos + offset;
335 let is_start_boundary = abs_offset == 0
336 || !source[..abs_offset]
337 .chars()
338 .last()
339 .map(|c| c.is_alphanumeric() || c == '_')
340 .unwrap_or(false);
341 let end_pos = abs_offset + substr.len();
342 let is_end_boundary = end_pos >= source.len()
343 || !source[end_pos..]
344 .chars()
345 .next()
346 .map(|c| c.is_alphanumeric() || c == '_')
347 .unwrap_or(false);
348
349 if is_start_boundary && is_end_boundary {
350 return Some(abs_offset as u32);
351 }
352
353 search_pos = abs_offset + 1;
354 }
355 None
356}
357
358pub fn name_range(source: &str, line_starts: &[u32], name: &str) -> Option<Range> {
360 let start = str_offset(source, name)?;
361 Some(Range {
362 start: offset_to_position(source, line_starts, start),
363 end: offset_to_position(source, line_starts, start + name.len() as u32),
364 })
365}
366
367pub fn str_offset_in_range(source: &str, span: Span, name: &str) -> Option<u32> {
370 let span_start = span.start as usize;
371 let span_end = span.end as usize;
372 if span_end > source.len() {
373 return None;
374 }
375 let span_text = &source[span_start..span_end];
376 let offset = str_offset(span_text, name)?;
377 Some(span_start as u32 + offset)
378}
379
380pub fn format_type_hint(hint: &TypeHint<'_, '_>) -> String {
384 fmt_kind(&hint.kind)
385}
386
387fn fmt_kind(kind: &TypeHintKind<'_, '_>) -> String {
388 match kind {
389 TypeHintKind::Named(name) => name.to_string_repr().to_string(),
390 TypeHintKind::Keyword(builtin, _) => builtin.as_str().to_string(),
391 TypeHintKind::Nullable(inner) => format!("?{}", format_type_hint(inner)),
392 TypeHintKind::Union(types) => types
393 .iter()
394 .map(format_type_hint)
395 .collect::<Vec<_>>()
396 .join("|"),
397 TypeHintKind::Intersection(types) => types
398 .iter()
399 .map(format_type_hint)
400 .collect::<Vec<_>>()
401 .join("&"),
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408
409 #[test]
410 fn parses_empty_source() {
411 let doc = ParsedDoc::parse("<?php".to_string());
412 assert!(doc.errors.is_empty());
413 assert!(doc.program().stmts.is_empty());
414 }
415
416 #[test]
417 fn parses_function() {
418 let doc = ParsedDoc::parse("<?php\nfunction foo() {}".to_string());
419 assert_eq!(doc.program().stmts.len(), 1);
420 }
421
422 #[test]
423 fn offset_to_position_first_line() {
424 let src = "<?php\nfoo";
425 let doc = ParsedDoc::parse(src.to_string());
426 assert_eq!(
427 offset_to_position(src, doc.line_starts(), 0),
428 Position {
429 line: 0,
430 character: 0
431 }
432 );
433 }
434
435 #[test]
436 fn offset_to_position_second_line() {
437 let src = "<?php\nfoo";
439 let doc = ParsedDoc::parse(src.to_string());
440 assert_eq!(
441 offset_to_position(src, doc.line_starts(), 6),
442 Position {
443 line: 1,
444 character: 0
445 }
446 );
447 }
448
449 #[test]
450 fn offset_to_position_multibyte_utf16() {
451 let src = "a\u{1F600}b";
456 let doc = ParsedDoc::parse(src.to_string());
457 assert_eq!(
458 offset_to_position(src, doc.line_starts(), 5), Position {
460 line: 0,
461 character: 3
462 } );
464 }
465
466 #[test]
467 fn offset_to_position_crlf_start_of_line() {
468 let src = "foo\r\nbar";
471 let doc = ParsedDoc::parse(src.to_string());
472 assert_eq!(
473 offset_to_position(src, doc.line_starts(), 5), Position {
475 line: 1,
476 character: 0
477 }
478 );
479 }
480
481 #[test]
482 fn offset_to_position_crlf_does_not_count_cr_in_column() {
483 let src = "foo\r\nbar";
486 let doc = ParsedDoc::parse(src.to_string());
487 assert_eq!(
488 offset_to_position(src, doc.line_starts(), 3), Position {
490 line: 0,
491 character: 3
492 }
493 );
494 }
495
496 #[test]
497 fn offset_to_position_crlf_multiline() {
498 let src = "a\r\nb\r\nc";
501 let doc = ParsedDoc::parse(src.to_string());
502 assert_eq!(
503 offset_to_position(src, doc.line_starts(), 6), Position {
505 line: 2,
506 character: 0
507 }
508 );
509 assert_eq!(
510 offset_to_position(src, doc.line_starts(), 3), Position {
512 line: 1,
513 character: 0
514 }
515 );
516 }
517
518 #[test]
519 fn str_offset_finds_substr() {
520 let src = "<?php\nfunction foo() {}";
521 let name = &src[15..18]; assert_eq!(str_offset(src, name), Some(15));
523 }
524
525 #[test]
526 fn str_offset_content_fallback_for_different_allocation() {
527 let owned = "foo".to_string();
530 assert_eq!(str_offset("<?php foo", &owned), Some(6));
531 }
532
533 #[test]
534 fn str_offset_unrelated_content_returns_none() {
535 let owned = "bar".to_string();
536 assert_eq!(str_offset("<?php foo", &owned), None);
537 }
538}