1use std::mem::ManuallyDrop;
7use std::sync::{Arc, LazyLock, Mutex};
8
9use php_ast::{Program, Span, TypeHint, TypeHintKind};
10use tower_lsp::lsp_types::{Position, Range};
11
12const POOL_CAP: usize = 8;
15
16struct BumpPool {
17 #[allow(clippy::vec_box)]
21 pool: Mutex<Vec<Box<bumpalo::Bump>>>,
22}
23
24impl BumpPool {
25 fn take(&self) -> Box<bumpalo::Bump> {
26 self.pool
27 .lock()
28 .unwrap()
29 .pop()
30 .unwrap_or_else(|| Box::new(bumpalo::Bump::new()))
31 }
32
33 fn give(&self, mut arena: Box<bumpalo::Bump>) {
34 arena.reset();
35 let mut p = self.pool.lock().unwrap();
36 if p.len() < POOL_CAP {
37 p.push(arena);
38 }
39 }
40}
41
42static BUMP_POOL: LazyLock<BumpPool> = LazyLock::new(|| BumpPool {
43 pool: Mutex::new(Vec::new()),
44});
45
46struct ArenaGuard(Option<Box<bumpalo::Bump>>);
50
51impl Drop for ArenaGuard {
52 fn drop(&mut self) {
53 if let Some(arena) = self.0.take() {
54 BUMP_POOL.give(arena);
55 }
56 }
57}
58
59pub struct ParsedDoc {
76 program: ManuallyDrop<Box<Program<'static, 'static>>>,
77 pub errors: Vec<php_rs_parser::diagnostics::ParseError>,
78 _source: Arc<str>,
79 line_starts: Vec<u32>,
80 _arena: ArenaGuard,
81}
82
83impl Drop for ParsedDoc {
84 fn drop(&mut self) {
85 unsafe { ManuallyDrop::drop(&mut self.program) };
89 }
90}
91
92unsafe impl Send for ParsedDoc {}
94unsafe impl Sync for ParsedDoc {}
95
96impl ParsedDoc {
97 pub fn parse(source: impl Into<Arc<str>>) -> Self {
101 let source: Arc<str> = source.into();
102 let arena_box = BUMP_POOL.take();
104
105 let src_ref: &'static str = unsafe { std::mem::transmute::<&str, &'static str>(&*source) };
111 let arena_ref: &'static bumpalo::Bump = unsafe {
112 std::mem::transmute::<&bumpalo::Bump, &'static bumpalo::Bump>(arena_box.as_ref())
113 };
114
115 let result = php_rs_parser::parse_arena(arena_ref, src_ref);
116
117 let line_starts = build_line_starts(src_ref);
118
119 ParsedDoc {
120 program: ManuallyDrop::new(Box::new(result.program)),
121 errors: result.errors,
122 _source: source,
123 line_starts,
124 _arena: ArenaGuard(Some(arena_box)),
125 }
126 }
127
128 #[inline]
133 pub fn program(&self) -> &Program<'_, '_> {
134 &self.program
135 }
136
137 #[inline]
139 pub fn source(&self) -> &str {
140 &self._source
141 }
142
143 #[inline]
149 pub fn source_arc(&self) -> Arc<str> {
150 self._source.clone()
151 }
152
153 pub fn line_starts(&self) -> &[u32] {
156 &self.line_starts
157 }
158
159 pub fn view(&self) -> SourceView<'_> {
161 SourceView {
162 source: self.source(),
163 line_starts: self.line_starts(),
164 }
165 }
166}
167
168impl Default for ParsedDoc {
169 fn default() -> Self {
170 ParsedDoc::parse("")
171 }
172}
173
174fn build_line_starts(source: &str) -> Vec<u32> {
179 let mut starts = vec![0u32];
180 for (i, b) in source.bytes().enumerate() {
181 if b == b'\n' {
182 starts.push(i as u32 + 1);
183 }
184 }
185 starts
186}
187
188#[derive(Copy, Clone)]
191pub struct SourceView<'a> {
192 source: &'a str,
193 line_starts: &'a [u32],
194}
195
196impl<'a> SourceView<'a> {
197 #[inline]
198 pub fn source(self) -> &'a str {
199 self.source
200 }
201
202 pub fn position_of(self, offset: u32) -> Position {
203 offset_to_position(self.source, self.line_starts, offset)
204 }
205
206 #[inline]
207 pub fn line_starts(self) -> &'a [u32] {
208 self.line_starts
209 }
210
211 #[inline]
214 pub fn line_of(self, offset: u32) -> u32 {
215 match self.line_starts.partition_point(|&s| s <= offset) {
216 0 => 0,
217 i => (i - 1) as u32,
218 }
219 }
220
221 pub fn byte_of_position(self, pos: Position) -> u32 {
226 let line_idx = pos.line as usize;
227 let line_start = self.line_starts.get(line_idx).copied().unwrap_or(0) as usize;
228 let line_end = self
229 .line_starts
230 .get(line_idx + 1)
231 .map(|&s| (s as usize).saturating_sub(1))
232 .unwrap_or(self.source.len());
233 let raw = &self.source[line_start..line_end.min(self.source.len())];
234 let line = raw.strip_suffix('\r').unwrap_or(raw);
235 let mut col_utf16: u32 = 0;
236 let mut byte_in_line: usize = 0;
237 for ch in line.chars() {
238 if col_utf16 >= pos.character {
239 break;
240 }
241 col_utf16 += ch.len_utf16() as u32;
242 byte_in_line += ch.len_utf8();
243 }
244 (line_start + byte_in_line) as u32
245 }
246
247 pub fn range_of(self, span: Span) -> Range {
248 Range {
249 start: self.position_of(span.start),
250 end: self.position_of(span.end),
251 }
252 }
253
254 pub fn name_range(self, name: &str) -> Range {
255 let start = str_offset(self.source, name).unwrap_or(0);
256 Range {
257 start: self.position_of(start),
258 end: self.position_of(start + name.len() as u32),
259 }
260 }
261
262 pub fn name_range_in_span(self, name: &str, span: php_ast::Span) -> Range {
268 let s = span.start as usize;
269 let e = (span.end as usize).min(self.source.len());
270 let start = self
271 .source
272 .get(s..e)
273 .and_then(|slice| slice.find(name))
274 .map(|off| span.start + off as u32)
275 .unwrap_or_else(|| str_offset(self.source, name).unwrap_or(0));
276 Range {
277 start: self.position_of(start),
278 end: self.position_of(start + name.len() as u32),
279 }
280 }
281}
282
283pub fn offset_to_position(source: &str, line_starts: &[u32], offset: u32) -> Position {
290 let offset_usize = (offset as usize).min(source.len());
291 let line = match line_starts.partition_point(|&s| s <= offset) {
293 0 => 0u32,
294 i => (i - 1) as u32,
295 };
296 let line_start = line_starts.get(line as usize).copied().unwrap_or(0) as usize;
297 let segment = &source[line_start..offset_usize];
298 let segment = segment.strip_suffix('\r').unwrap_or(segment);
300 let character = segment.chars().map(|c| c.len_utf16() as u32).sum::<u32>();
301 Position { line, character }
302}
303
304pub fn span_to_range(source: &str, line_starts: &[u32], span: Span) -> Range {
306 Range {
307 start: offset_to_position(source, line_starts, span.start),
308 end: offset_to_position(source, line_starts, span.end),
309 }
310}
311
312pub fn str_offset(source: &str, substr: &str) -> Option<u32> {
323 let src_ptr = source.as_ptr() as usize;
324 let sub_ptr = substr.as_ptr() as usize;
325 if sub_ptr >= src_ptr && sub_ptr + substr.len() <= src_ptr + source.len() {
326 return Some((sub_ptr - src_ptr) as u32);
327 }
328 let mut search_pos = 0;
332 while let Some(offset) = source[search_pos..].find(substr) {
333 let abs_offset = search_pos + offset;
334 let is_start_boundary = abs_offset == 0
335 || !source[..abs_offset]
336 .chars()
337 .last()
338 .map(|c| c.is_alphanumeric() || c == '_')
339 .unwrap_or(false);
340 let end_pos = abs_offset + substr.len();
341 let is_end_boundary = end_pos >= source.len()
342 || !source[end_pos..]
343 .chars()
344 .next()
345 .map(|c| c.is_alphanumeric() || c == '_')
346 .unwrap_or(false);
347
348 if is_start_boundary && is_end_boundary {
349 return Some(abs_offset as u32);
350 }
351
352 search_pos = abs_offset + 1;
353 }
354 None
355}
356
357pub fn name_range(source: &str, line_starts: &[u32], name: &str) -> Option<Range> {
359 let start = str_offset(source, name)?;
360 Some(Range {
361 start: offset_to_position(source, line_starts, start),
362 end: offset_to_position(source, line_starts, start + name.len() as u32),
363 })
364}
365
366pub fn str_offset_in_range(source: &str, span: Span, name: &str) -> Option<u32> {
369 let span_start = span.start as usize;
370 let span_end = span.end as usize;
371 if span_end > source.len() {
372 return None;
373 }
374 let span_text = &source[span_start..span_end];
375 let offset = str_offset(span_text, name)?;
376 Some(span_start as u32 + offset)
377}
378
379pub fn format_type_hint(hint: &TypeHint<'_, '_>) -> String {
383 fmt_kind(&hint.kind)
384}
385
386fn fmt_kind(kind: &TypeHintKind<'_, '_>) -> String {
387 match kind {
388 TypeHintKind::Named(name) => name.to_string_repr().to_string(),
389 TypeHintKind::Keyword(builtin, _) => builtin.as_str().to_string(),
390 TypeHintKind::Nullable(inner) => format!("?{}", format_type_hint(inner)),
391 TypeHintKind::Union(types) => types
392 .iter()
393 .map(format_type_hint)
394 .collect::<Vec<_>>()
395 .join("|"),
396 TypeHintKind::Intersection(types) => types
397 .iter()
398 .map(format_type_hint)
399 .collect::<Vec<_>>()
400 .join("&"),
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407
408 #[test]
409 fn parses_empty_source() {
410 let doc = ParsedDoc::parse("<?php".to_string());
411 assert!(doc.errors.is_empty());
412 assert!(doc.program().stmts.is_empty());
413 }
414
415 #[test]
416 fn parses_function() {
417 let doc = ParsedDoc::parse("<?php\nfunction foo() {}".to_string());
418 assert_eq!(doc.program().stmts.len(), 1);
419 }
420
421 #[test]
422 fn offset_to_position_first_line() {
423 let src = "<?php\nfoo";
424 let doc = ParsedDoc::parse(src.to_string());
425 assert_eq!(
426 offset_to_position(src, doc.line_starts(), 0),
427 Position {
428 line: 0,
429 character: 0
430 }
431 );
432 }
433
434 #[test]
435 fn offset_to_position_second_line() {
436 let src = "<?php\nfoo";
438 let doc = ParsedDoc::parse(src.to_string());
439 assert_eq!(
440 offset_to_position(src, doc.line_starts(), 6),
441 Position {
442 line: 1,
443 character: 0
444 }
445 );
446 }
447
448 #[test]
449 fn offset_to_position_multibyte_utf16() {
450 let src = "a\u{1F600}b";
455 let doc = ParsedDoc::parse(src.to_string());
456 assert_eq!(
457 offset_to_position(src, doc.line_starts(), 5), Position {
459 line: 0,
460 character: 3
461 } );
463 }
464
465 #[test]
466 fn offset_to_position_crlf_start_of_line() {
467 let src = "foo\r\nbar";
470 let doc = ParsedDoc::parse(src.to_string());
471 assert_eq!(
472 offset_to_position(src, doc.line_starts(), 5), Position {
474 line: 1,
475 character: 0
476 }
477 );
478 }
479
480 #[test]
481 fn offset_to_position_crlf_does_not_count_cr_in_column() {
482 let src = "foo\r\nbar";
485 let doc = ParsedDoc::parse(src.to_string());
486 assert_eq!(
487 offset_to_position(src, doc.line_starts(), 3), Position {
489 line: 0,
490 character: 3
491 }
492 );
493 }
494
495 #[test]
496 fn offset_to_position_crlf_multiline() {
497 let src = "a\r\nb\r\nc";
500 let doc = ParsedDoc::parse(src.to_string());
501 assert_eq!(
502 offset_to_position(src, doc.line_starts(), 6), Position {
504 line: 2,
505 character: 0
506 }
507 );
508 assert_eq!(
509 offset_to_position(src, doc.line_starts(), 3), Position {
511 line: 1,
512 character: 0
513 }
514 );
515 }
516
517 #[test]
518 fn str_offset_finds_substr() {
519 let src = "<?php\nfunction foo() {}";
520 let name = &src[15..18]; assert_eq!(str_offset(src, name), Some(15));
522 }
523
524 #[test]
525 fn str_offset_content_fallback_for_different_allocation() {
526 let owned = "foo".to_string();
529 assert_eq!(str_offset("<?php foo", &owned), Some(6));
530 }
531
532 #[test]
533 fn str_offset_unrelated_content_returns_none() {
534 let owned = "bar".to_string();
535 assert_eq!(str_offset("<?php foo", &owned), None);
536 }
537}