1use std::mem::ManuallyDrop;
3use std::sync::{Arc, LazyLock, Mutex};
4
5use php_ast::{Program, Span, TypeHint, TypeHintKind};
6use tower_lsp::lsp_types::{Position, Range};
7
8const POOL_CAP: usize = 8;
11
12struct BumpPool {
13 #[allow(clippy::vec_box)]
17 pool: Mutex<Vec<Box<bumpalo::Bump>>>,
18}
19
20impl BumpPool {
21 fn take(&self) -> Box<bumpalo::Bump> {
22 self.pool
23 .lock()
24 .unwrap()
25 .pop()
26 .unwrap_or_else(|| Box::new(bumpalo::Bump::new()))
27 }
28
29 fn give(&self, mut arena: Box<bumpalo::Bump>) {
30 arena.reset();
31 let mut p = self.pool.lock().unwrap();
32 if p.len() < POOL_CAP {
33 p.push(arena);
34 }
35 }
36}
37
38static BUMP_POOL: LazyLock<BumpPool> = LazyLock::new(|| BumpPool {
39 pool: Mutex::new(Vec::new()),
40});
41
42struct ArenaGuard(Option<Box<bumpalo::Bump>>);
46
47impl Drop for ArenaGuard {
48 fn drop(&mut self) {
49 if let Some(arena) = self.0.take() {
50 BUMP_POOL.give(arena);
51 }
52 }
53}
54
55pub struct ParsedDoc {
72 program: ManuallyDrop<Box<Program<'static, 'static>>>,
73 pub errors: Vec<php_rs_parser::diagnostics::ParseError>,
74 _source: Arc<str>,
75 line_starts: Vec<u32>,
76 _arena: ArenaGuard,
77}
78
79impl Drop for ParsedDoc {
80 fn drop(&mut self) {
81 unsafe { ManuallyDrop::drop(&mut self.program) };
85 }
86}
87
88unsafe impl Send for ParsedDoc {}
90unsafe impl Sync for ParsedDoc {}
91
92impl ParsedDoc {
93 pub fn parse(source: impl Into<Arc<str>>) -> Self {
97 let source: Arc<str> = source.into();
98 let arena_box = BUMP_POOL.take();
100
101 let src_ref: &'static str = unsafe { std::mem::transmute::<&str, &'static str>(&*source) };
107 let arena_ref: &'static bumpalo::Bump = unsafe {
108 std::mem::transmute::<&bumpalo::Bump, &'static bumpalo::Bump>(arena_box.as_ref())
109 };
110
111 let result = php_rs_parser::parse_arena(arena_ref, src_ref);
112
113 let line_starts = build_line_starts(src_ref);
114
115 ParsedDoc {
116 program: ManuallyDrop::new(Box::new(result.program)),
117 errors: result.errors,
118 _source: source,
119 line_starts,
120 _arena: ArenaGuard(Some(arena_box)),
121 }
122 }
123
124 #[inline]
129 pub fn program(&self) -> &Program<'_, '_> {
130 &self.program
131 }
132
133 #[inline]
135 pub fn source(&self) -> &str {
136 &self._source
137 }
138
139 #[inline]
145 pub fn source_arc(&self) -> Arc<str> {
146 self._source.clone()
147 }
148
149 pub fn line_starts(&self) -> &[u32] {
152 &self.line_starts
153 }
154
155 pub fn view(&self) -> SourceView<'_> {
157 SourceView {
158 source: self.source(),
159 line_starts: self.line_starts(),
160 }
161 }
162}
163
164impl Default for ParsedDoc {
165 fn default() -> Self {
166 ParsedDoc::parse("")
167 }
168}
169
170fn build_line_starts(source: &str) -> Vec<u32> {
175 let mut starts = vec![0u32];
176 for (i, b) in source.bytes().enumerate() {
177 if b == b'\n' {
178 starts.push(i as u32 + 1);
179 }
180 }
181 starts
182}
183
184#[derive(Copy, Clone)]
187pub struct SourceView<'a> {
188 source: &'a str,
189 line_starts: &'a [u32],
190}
191
192impl<'a> SourceView<'a> {
193 #[inline]
194 pub fn source(self) -> &'a str {
195 self.source
196 }
197
198 pub fn position_of(self, offset: u32) -> Position {
199 offset_to_position(self.source, self.line_starts, offset)
200 }
201
202 #[inline]
203 pub fn line_starts(self) -> &'a [u32] {
204 self.line_starts
205 }
206
207 #[inline]
210 pub fn line_of(self, offset: u32) -> u32 {
211 match self.line_starts.partition_point(|&s| s <= offset) {
212 0 => 0,
213 i => (i - 1) as u32,
214 }
215 }
216
217 pub fn byte_of_position(self, pos: Position) -> u32 {
222 let line_idx = pos.line as usize;
223 let line_start = self.line_starts.get(line_idx).copied().unwrap_or(0) as usize;
224 let line_end = self
225 .line_starts
226 .get(line_idx + 1)
227 .map(|&s| (s as usize).saturating_sub(1))
228 .unwrap_or(self.source.len());
229 let raw = &self.source[line_start..line_end.min(self.source.len())];
230 let line = raw.strip_suffix('\r').unwrap_or(raw);
231 let mut col_utf16: u32 = 0;
232 let mut byte_in_line: usize = 0;
233 for ch in line.chars() {
234 if col_utf16 >= pos.character {
235 break;
236 }
237 col_utf16 += ch.len_utf16() as u32;
238 byte_in_line += ch.len_utf8();
239 }
240 (line_start + byte_in_line) as u32
241 }
242
243 pub fn range_of(self, span: Span) -> Range {
244 Range {
245 start: self.position_of(span.start),
246 end: self.position_of(span.end),
247 }
248 }
249
250 pub fn name_range(self, name: &str) -> Range {
251 let start = str_offset(self.source, name).unwrap_or(0);
252 Range {
253 start: self.position_of(start),
254 end: self.position_of(start + name.len() as u32),
255 }
256 }
257
258 pub fn name_range_in_span(self, name: &str, span: php_ast::Span) -> Range {
264 let s = span.start as usize;
265 let e = (span.end as usize).min(self.source.len());
266 let start = self
267 .source
268 .get(s..e)
269 .and_then(|slice| slice.find(name))
270 .map(|off| span.start + off as u32)
271 .unwrap_or_else(|| str_offset(self.source, name).unwrap_or(0));
272 Range {
273 start: self.position_of(start),
274 end: self.position_of(start + name.len() as u32),
275 }
276 }
277}
278
279pub fn offset_to_position(source: &str, line_starts: &[u32], offset: u32) -> Position {
286 let offset_usize = (offset as usize).min(source.len());
287 let line = match line_starts.partition_point(|&s| s <= offset) {
289 0 => 0u32,
290 i => (i - 1) as u32,
291 };
292 let line_start = line_starts.get(line as usize).copied().unwrap_or(0) as usize;
293 let segment = &source[line_start..offset_usize];
294 let segment = segment.strip_suffix('\r').unwrap_or(segment);
296 let character = segment.chars().map(|c| c.len_utf16() as u32).sum::<u32>();
297 Position { line, character }
298}
299
300pub fn span_to_range(source: &str, line_starts: &[u32], span: Span) -> Range {
302 Range {
303 start: offset_to_position(source, line_starts, span.start),
304 end: offset_to_position(source, line_starts, span.end),
305 }
306}
307
308pub fn str_offset(source: &str, substr: &str) -> Option<u32> {
319 let src_ptr = source.as_ptr() as usize;
320 let sub_ptr = substr.as_ptr() as usize;
321 if sub_ptr >= src_ptr && sub_ptr + substr.len() <= src_ptr + source.len() {
322 return Some((sub_ptr - src_ptr) as u32);
323 }
324 let mut search_pos = 0;
328 while let Some(offset) = source[search_pos..].find(substr) {
329 let abs_offset = search_pos + offset;
330 let is_start_boundary = abs_offset == 0
331 || !source[..abs_offset]
332 .chars()
333 .last()
334 .map(|c| c.is_alphanumeric() || c == '_')
335 .unwrap_or(false);
336 let end_pos = abs_offset + substr.len();
337 let is_end_boundary = end_pos >= source.len()
338 || !source[end_pos..]
339 .chars()
340 .next()
341 .map(|c| c.is_alphanumeric() || c == '_')
342 .unwrap_or(false);
343
344 if is_start_boundary && is_end_boundary {
345 return Some(abs_offset as u32);
346 }
347
348 search_pos = abs_offset + 1;
349 }
350 None
351}
352
353pub fn name_range(source: &str, line_starts: &[u32], name: &str) -> Option<Range> {
355 let start = str_offset(source, name)?;
356 Some(Range {
357 start: offset_to_position(source, line_starts, start),
358 end: offset_to_position(source, line_starts, start + name.len() as u32),
359 })
360}
361
362pub fn str_offset_in_range(source: &str, span: Span, name: &str) -> Option<u32> {
365 let span_start = span.start as usize;
366 let span_end = span.end as usize;
367 if span_end > source.len() {
368 return None;
369 }
370 let span_text = &source[span_start..span_end];
371 let offset = str_offset(span_text, name)?;
372 Some(span_start as u32 + offset)
373}
374
375pub fn format_type_hint(hint: &TypeHint<'_, '_>) -> String {
379 fmt_kind(&hint.kind)
380}
381
382fn fmt_kind(kind: &TypeHintKind<'_, '_>) -> String {
383 match kind {
384 TypeHintKind::Named(name) => name.to_string_repr().to_string(),
385 TypeHintKind::Keyword(builtin, _) => builtin.as_str().to_string(),
386 TypeHintKind::Nullable(inner) => format!("?{}", format_type_hint(inner)),
387 TypeHintKind::Union(types) => types
388 .iter()
389 .map(format_type_hint)
390 .collect::<Vec<_>>()
391 .join("|"),
392 TypeHintKind::Intersection(types) => types
393 .iter()
394 .map(format_type_hint)
395 .collect::<Vec<_>>()
396 .join("&"),
397 }
398}
399
400#[cfg(test)]
401mod tests {
402 use super::*;
403
404 #[test]
405 fn parses_empty_source() {
406 let doc = ParsedDoc::parse("<?php".to_string());
407 assert!(doc.errors.is_empty());
408 assert!(doc.program().stmts.is_empty());
409 }
410
411 #[test]
412 fn parses_function() {
413 let doc = ParsedDoc::parse("<?php\nfunction foo() {}".to_string());
414 assert_eq!(doc.program().stmts.len(), 1);
415 }
416
417 #[test]
418 fn offset_to_position_first_line() {
419 let src = "<?php\nfoo";
420 let doc = ParsedDoc::parse(src.to_string());
421 assert_eq!(
422 offset_to_position(src, doc.line_starts(), 0),
423 Position {
424 line: 0,
425 character: 0
426 }
427 );
428 }
429
430 #[test]
431 fn offset_to_position_second_line() {
432 let src = "<?php\nfoo";
434 let doc = ParsedDoc::parse(src.to_string());
435 assert_eq!(
436 offset_to_position(src, doc.line_starts(), 6),
437 Position {
438 line: 1,
439 character: 0
440 }
441 );
442 }
443
444 #[test]
445 fn offset_to_position_multibyte_utf16() {
446 let src = "a\u{1F600}b";
451 let doc = ParsedDoc::parse(src.to_string());
452 assert_eq!(
453 offset_to_position(src, doc.line_starts(), 5), Position {
455 line: 0,
456 character: 3
457 } );
459 }
460
461 #[test]
462 fn offset_to_position_crlf_start_of_line() {
463 let src = "foo\r\nbar";
466 let doc = ParsedDoc::parse(src.to_string());
467 assert_eq!(
468 offset_to_position(src, doc.line_starts(), 5), Position {
470 line: 1,
471 character: 0
472 }
473 );
474 }
475
476 #[test]
477 fn offset_to_position_crlf_does_not_count_cr_in_column() {
478 let src = "foo\r\nbar";
481 let doc = ParsedDoc::parse(src.to_string());
482 assert_eq!(
483 offset_to_position(src, doc.line_starts(), 3), Position {
485 line: 0,
486 character: 3
487 }
488 );
489 }
490
491 #[test]
492 fn offset_to_position_crlf_multiline() {
493 let src = "a\r\nb\r\nc";
496 let doc = ParsedDoc::parse(src.to_string());
497 assert_eq!(
498 offset_to_position(src, doc.line_starts(), 6), Position {
500 line: 2,
501 character: 0
502 }
503 );
504 assert_eq!(
505 offset_to_position(src, doc.line_starts(), 3), Position {
507 line: 1,
508 character: 0
509 }
510 );
511 }
512
513 #[test]
514 fn str_offset_finds_substr() {
515 let src = "<?php\nfunction foo() {}";
516 let name = &src[15..18]; assert_eq!(str_offset(src, name), Some(15));
518 }
519
520 #[test]
521 fn str_offset_content_fallback_for_different_allocation() {
522 let owned = "foo".to_string();
525 assert_eq!(str_offset("<?php foo", &owned), Some(6));
526 }
527
528 #[test]
529 fn str_offset_unrelated_content_returns_none() {
530 let owned = "bar".to_string();
531 assert_eq!(str_offset("<?php foo", &owned), None);
532 }
533}