Skip to main content

perl_semantic_analyzer/analysis/semantic/
builtins.rs

1//! Perl built-in function documentation and classification.
2
3/// Documentation entry for a Perl built-in function.
4///
5/// Provides signature and description information for display in hover tooltips.
6pub struct BuiltinDoc {
7    /// Function signature showing calling conventions
8    pub signature: &'static str,
9    /// Brief description of what the function does
10    pub description: &'static str,
11}
12
13/// Normalize a built-in name for lookup.
14///
15/// Perl allows calling built-ins with a `CORE::` prefix (for example
16/// `CORE::length`). The semantic analyzer stores the function call text as-is,
17/// so normalize here to keep builtin classification and hover docs consistent.
18fn normalized_builtin_name(name: &str) -> &str {
19    name.strip_prefix("CORE::").unwrap_or(name)
20}
21
22/// Check if a function name is a Perl control-flow keyword.
23///
24/// Returns `true` if the name is a control-flow keyword like `next`, `last`, etc.
25pub(super) fn is_control_keyword(name: &str) -> bool {
26    let name = normalized_builtin_name(name);
27    matches!(name, "next" | "last" | "redo" | "goto" | "return" | "exit" | "die")
28}
29
30/// Check if a function name is a Perl built-in.
31///
32/// Returns `true` if the name matches a known Perl built-in function.
33pub(super) fn is_builtin_function(name: &str) -> bool {
34    let name = normalized_builtin_name(name);
35    matches!(
36        name,
37        "print"
38            | "say"
39            | "printf"
40            | "sprintf"
41            | "open"
42            | "close"
43            | "read"
44            | "write"
45            | "chomp"
46            | "chop"
47            | "split"
48            | "join"
49            | "push"
50            | "pop"
51            | "shift"
52            | "unshift"
53            | "sort"
54            | "reverse"
55            | "map"
56            | "grep"
57            | "length"
58            | "substr"
59            | "index"
60            | "rindex"
61            | "lc"
62            | "uc"
63            | "lcfirst"
64            | "ucfirst"
65            | "defined"
66            | "undef"
67            | "ref"
68            | "blessed"
69            | "die"
70            | "warn"
71            | "eval"
72            | "require"
73            | "use"
74            | "return"
75            | "next"
76            | "last"
77            | "redo"
78            | "goto" // ... many more
79    )
80}
81
82/// Check if an operator is a file test operator.
83///
84/// File test operators in Perl are unary operators that test file properties:
85/// -e (exists), -d (directory), -f (file), -r (readable), -w (writable), etc.
86pub(super) fn is_file_test_operator(op: &str) -> bool {
87    matches!(
88        op,
89        "-e" | "-d"
90            | "-f"
91            | "-r"
92            | "-w"
93            | "-x"
94            | "-s"
95            | "-z"
96            | "-T"
97            | "-B"
98            | "-M"
99            | "-A"
100            | "-C"
101            | "-l"
102            | "-p"
103            | "-S"
104            | "-u"
105            | "-g"
106            | "-k"
107            | "-t"
108            | "-O"
109            | "-G"
110            | "-R"
111            | "-b"
112            | "-c"
113    )
114}
115
116/// Get documentation for a Perl file test operator.
117///
118/// Returns signature and description for known file test operators,
119/// or `None` if documentation is not available.
120pub fn get_operator_documentation(op: &str) -> Option<BuiltinDoc> {
121    macro_rules! doc {
122        ($signature:expr, $description:expr) => {
123            Some(BuiltinDoc { signature: $signature, description: $description })
124        };
125    }
126
127    match op {
128        "-e" => doc!("-e FILE\n-e", "Returns true if FILE exists. If FILE is omitted, tests `$_`."),
129        "-f" => doc!(
130            "-f FILE\n-f",
131            "Returns true if FILE is a plain file. If FILE is omitted, tests `$_`."
132        ),
133        "-d" => doc!(
134            "-d FILE\n-d",
135            "Returns true if FILE is a directory. If FILE is omitted, tests `$_`."
136        ),
137        "-r" => doc!(
138            "-r FILE\n-r",
139            "Returns true if FILE is readable by the effective user or group ID. If FILE is omitted, tests `$_`."
140        ),
141        "-w" => doc!(
142            "-w FILE\n-w",
143            "Returns true if FILE is writable by the effective user or group ID. If FILE is omitted, tests `$_`."
144        ),
145        "-x" => doc!(
146            "-x FILE\n-x",
147            "Returns true if FILE is executable by the effective user or group ID. If FILE is omitted, tests `$_`."
148        ),
149        "-o" => doc!(
150            "-o FILE\n-o",
151            "Returns true if FILE is owned by the effective user ID. If FILE is omitted, tests `$_`."
152        ),
153        "-R" => doc!(
154            "-R FILE\n-R",
155            "Returns true if FILE is readable by the real user or group ID. If FILE is omitted, tests `$_`."
156        ),
157        "-W" => doc!(
158            "-W FILE\n-W",
159            "Returns true if FILE is writable by the real user or group ID. If FILE is omitted, tests `$_`."
160        ),
161        "-X" => doc!(
162            "-X FILE\n-X",
163            "Returns true if FILE is executable by the real user or group ID. If FILE is omitted, tests `$_`."
164        ),
165        "-O" => doc!(
166            "-O FILE\n-O",
167            "Returns true if FILE is owned by the real user ID. If FILE is omitted, tests `$_`."
168        ),
169        "-z" => doc!(
170            "-z FILE\n-z",
171            "Returns true if FILE exists and has zero size. If FILE is omitted, tests `$_`."
172        ),
173        "-s" => doc!(
174            "-s FILE\n-s",
175            "Returns the file size in bytes in scalar context, or true if FILE has nonzero size. If FILE is omitted, tests `$_`."
176        ),
177        "-l" => doc!(
178            "-l FILE\n-l",
179            "Returns true if FILE is a symbolic link. If FILE is omitted, tests `$_`."
180        ),
181        "-p" => doc!(
182            "-p FILE\n-p",
183            "Returns true if FILE is a named pipe (FIFO). If FILE is omitted, tests `$_`."
184        ),
185        "-S" => {
186            doc!("-S FILE\n-S", "Returns true if FILE is a socket. If FILE is omitted, tests `$_`.")
187        }
188        "-u" => doc!(
189            "-u FILE\n-u",
190            "Returns true if FILE has the setuid bit set. If FILE is omitted, tests `$_`."
191        ),
192        "-g" => doc!(
193            "-g FILE\n-g",
194            "Returns true if FILE has the setgid bit set. If FILE is omitted, tests `$_`."
195        ),
196        "-k" => doc!(
197            "-k FILE\n-k",
198            "Returns true if FILE has the sticky bit set. If FILE is omitted, tests `$_`."
199        ),
200        "-t" => doc!(
201            "-t FILEHANDLE\n-t",
202            "Returns true if FILEHANDLE is connected to a tty. If FILEHANDLE is omitted, tests `STDIN`."
203        ),
204        "-T" => doc!(
205            "-T FILE\n-T",
206            "Returns true if FILE looks like a text file. If FILE is omitted, tests `$_`."
207        ),
208        "-B" => doc!(
209            "-B FILE\n-B",
210            "Returns true if FILE looks like a binary file. If FILE is omitted, tests `$_`."
211        ),
212        "-M" => doc!(
213            "-M FILE\n-M",
214            "Returns the file age in days at program start, based on the file's modification time."
215        ),
216        "-A" => doc!("-A FILE\n-A", "Returns the file age in days based on the last access time."),
217        "-C" => {
218            doc!("-C FILE\n-C", "Returns the file age in days based on the last inode change time.")
219        }
220        "-b" => doc!(
221            "-b FILE\n-b",
222            "Returns true if FILE is a block special file. If FILE is omitted, tests `$_`."
223        ),
224        "-c" => doc!(
225            "-c FILE\n-c",
226            "Returns true if FILE is a character special file. If FILE is omitted, tests `$_`."
227        ),
228        _ => None,
229    }
230}
231
232/// Get documentation for a Perl built-in function.
233///
234/// Returns signature and description for known built-in functions,
235/// or `None` if documentation is not available.
236///
237/// This is also used by the LSP hover handler to show builtin docs when the
238/// semantic analyzer has no symbol-level hit (e.g. bare-word builtins in
239/// fallback path).
240pub fn get_builtin_documentation(name: &str) -> Option<BuiltinDoc> {
241    let name = normalized_builtin_name(name);
242    match name {
243        // I/O
244        "print" => Some(BuiltinDoc {
245            signature: "print FILEHANDLE LIST\nprint LIST\nprint",
246            description: "Prints a string or list of strings. If FILEHANDLE is omitted, prints to the last selected output handle (STDOUT by default).",
247        }),
248        "say" => Some(BuiltinDoc {
249            signature: "say FILEHANDLE LIST\nsay LIST\nsay",
250            description: "Like print, but appends a newline to the output.",
251        }),
252        "printf" => Some(BuiltinDoc {
253            signature: "printf FILEHANDLE FORMAT, LIST\nprintf FORMAT, LIST",
254            description: "Prints a formatted string to FILEHANDLE (default STDOUT).",
255        }),
256        "sprintf" => Some(BuiltinDoc {
257            signature: "sprintf FORMAT, LIST",
258            description: "Returns a formatted string (like C sprintf). Does not print.",
259        }),
260        "open" => Some(BuiltinDoc {
261            signature: "open FILEHANDLE, MODE, EXPR\nopen FILEHANDLE, EXPR\nopen FILEHANDLE",
262            description: "Opens the file whose filename is given by EXPR, and associates it with FILEHANDLE.",
263        }),
264        "close" => Some(BuiltinDoc {
265            signature: "close FILEHANDLE\nclose",
266            description: "Closes the file, socket, or pipe associated with FILEHANDLE.",
267        }),
268        "read" => Some(BuiltinDoc {
269            signature: "read FILEHANDLE, SCALAR, LENGTH, OFFSET\nread FILEHANDLE, SCALAR, LENGTH",
270            description: "Reads LENGTH bytes of data into SCALAR from FILEHANDLE. Returns the number of bytes read, or undef on error.",
271        }),
272        "write" => Some(BuiltinDoc {
273            signature: "write FILEHANDLE\nwrite",
274            description: "Writes a formatted record to FILEHANDLE using the format associated with it.",
275        }),
276        "seek" => Some(BuiltinDoc {
277            signature: "seek FILEHANDLE, POSITION, WHENCE",
278            description: "Sets the position for a filehandle. WHENCE: 0=start, 1=current, 2=end.",
279        }),
280        "tell" => Some(BuiltinDoc {
281            signature: "tell FILEHANDLE\ntell",
282            description: "Returns the current position in bytes for FILEHANDLE.",
283        }),
284        "eof" => Some(BuiltinDoc {
285            signature: "eof FILEHANDLE\neof()\neof",
286            description: "Returns true if the next read on FILEHANDLE would return end of file.",
287        }),
288        "binmode" => Some(BuiltinDoc {
289            signature: "binmode FILEHANDLE, LAYER\nbinmode FILEHANDLE",
290            description: "Sets binary mode on FILEHANDLE, or specifies an I/O layer.",
291        }),
292        "truncate" => Some(BuiltinDoc {
293            signature: "truncate FILEHANDLE, LENGTH",
294            description: "Truncates the file at the given LENGTH.",
295        }),
296
297        // String functions
298        "chomp" => Some(BuiltinDoc {
299            signature: "chomp VARIABLE\nchomp LIST\nchomp",
300            description: "Removes the trailing newline from VARIABLE. Returns the number of characters removed.",
301        }),
302        "chop" => Some(BuiltinDoc {
303            signature: "chop VARIABLE\nchop LIST\nchop",
304            description: "Removes and returns the last character from VARIABLE.",
305        }),
306        "length" => Some(BuiltinDoc {
307            signature: "length EXPR\nlength",
308            description: "Returns the length in characters of the value of EXPR.",
309        }),
310        "substr" => Some(BuiltinDoc {
311            signature: "substr EXPR, OFFSET, LENGTH, REPLACEMENT\nsubstr EXPR, OFFSET, LENGTH\nsubstr EXPR, OFFSET",
312            description: "Extracts a substring out of EXPR and returns it. With REPLACEMENT, replaces the substring in-place.",
313        }),
314        "index" => Some(BuiltinDoc {
315            signature: "index STR, SUBSTR, POSITION\nindex STR, SUBSTR",
316            description: "Returns the position of the first occurrence of SUBSTR in STR at or after POSITION. Returns -1 if not found.",
317        }),
318        "rindex" => Some(BuiltinDoc {
319            signature: "rindex STR, SUBSTR, POSITION\nrindex STR, SUBSTR",
320            description: "Returns the position of the last occurrence of SUBSTR in STR at or before POSITION.",
321        }),
322        "lc" => Some(BuiltinDoc {
323            signature: "lc EXPR\nlc",
324            description: "Returns a lowercased version of EXPR (or $_ if omitted).",
325        }),
326        "uc" => Some(BuiltinDoc {
327            signature: "uc EXPR\nuc",
328            description: "Returns an uppercased version of EXPR (or $_ if omitted).",
329        }),
330        "lcfirst" => Some(BuiltinDoc {
331            signature: "lcfirst EXPR\nlcfirst",
332            description: "Returns EXPR with the first character lowercased.",
333        }),
334        "ucfirst" => Some(BuiltinDoc {
335            signature: "ucfirst EXPR\nucfirst",
336            description: "Returns EXPR with the first character uppercased.",
337        }),
338        "chr" => Some(BuiltinDoc {
339            signature: "chr NUMBER\nchr",
340            description: "Returns the character represented by NUMBER in the character set.",
341        }),
342        "ord" => Some(BuiltinDoc {
343            signature: "ord EXPR\nord",
344            description: "Returns the numeric value of the first character of EXPR.",
345        }),
346        "hex" => Some(BuiltinDoc {
347            signature: "hex EXPR\nhex",
348            description: "Interprets EXPR as a hex string and returns the corresponding numeric value.",
349        }),
350        "oct" => Some(BuiltinDoc {
351            signature: "oct EXPR\noct",
352            description: "Interprets EXPR as an octal string and returns the corresponding value. Handles 0x, 0b, and 0 prefixes.",
353        }),
354        "quotemeta" => Some(BuiltinDoc {
355            signature: "quotemeta EXPR\nquotemeta",
356            description: "Returns EXPR with all non-alphanumeric characters backslashed (escaped for regex).",
357        }),
358        "join" => Some(BuiltinDoc {
359            signature: "join EXPR, LIST",
360            description: "Joins the separate strings of LIST into a single string with fields separated by EXPR, and returns that string.\n\n```perl\nmy $str = join(', ', 'a', 'b', 'c');  # \"a, b, c\"\nmy $csv = join(',', @fields);\n```",
361        }),
362        "split" => Some(BuiltinDoc {
363            signature: "split /PATTERN/, EXPR, LIMIT\nsplit /PATTERN/, EXPR\nsplit /PATTERN/\nsplit",
364            description: "Splits the string EXPR into a list of strings and returns the list. If LIMIT is specified, splits into at most that many fields.\n\n```perl\nmy @words = split /\\s+/, $line;       # split on whitespace\nmy @fields = split /,/, $csv, 10;    # at most 10 fields\n```",
365        }),
366
367        // Array/List
368        "push" => Some(BuiltinDoc {
369            signature: "push ARRAY, LIST",
370            description: "Appends one or more values to the end of ARRAY. Returns the number of elements in the resulting array.\n\n```perl\nmy @list = (1, 2);\npush @list, 3, 4;   # @list is now (1, 2, 3, 4)\n```",
371        }),
372        "pop" => Some(BuiltinDoc {
373            signature: "pop ARRAY\npop",
374            description: "Removes and returns the last element of ARRAY.\n\n```perl\nmy @stack = (1, 2, 3);\nmy $top = pop @stack;   # $top = 3, @stack = (1, 2)\n```",
375        }),
376        "shift" => Some(BuiltinDoc {
377            signature: "shift ARRAY\nshift",
378            description: "Removes and returns the first element of ARRAY, shortening the array by 1.\n\n```perl\nmy @queue = ('first', 'second');\nmy $item = shift @queue;   # $item = 'first'\n```",
379        }),
380        "unshift" => Some(BuiltinDoc {
381            signature: "unshift ARRAY, LIST",
382            description: "Prepends LIST to the front of ARRAY. Returns the number of elements in the new array.\n\n```perl\nmy @list = (3, 4);\nunshift @list, 1, 2;   # @list is now (1, 2, 3, 4)\n```",
383        }),
384        "splice" => Some(BuiltinDoc {
385            signature: "splice ARRAY, OFFSET, LENGTH, LIST\nsplice ARRAY, OFFSET, LENGTH\nsplice ARRAY, OFFSET\nsplice ARRAY",
386            description: "Removes LENGTH elements from ARRAY starting at OFFSET, replacing them with LIST. Returns the removed elements. In scalar context, returns the last removed element.",
387        }),
388        "sort" => Some(BuiltinDoc {
389            signature: "sort SUBNAME LIST\nsort BLOCK LIST\nsort LIST",
390            description: "Sorts LIST and returns the sorted list. BLOCK or SUBNAME provides a custom comparison function using $a and $b. Only valid in list context; using sort in scalar context returns undef (avoid).",
391        }),
392        "reverse" => Some(BuiltinDoc {
393            signature: "reverse LIST",
394            description: "In list context, returns LIST in reverse order. In scalar context, returns a string with characters reversed.",
395        }),
396        "map" => Some(BuiltinDoc {
397            signature: "map BLOCK LIST\nmap EXPR, LIST",
398            description: "Evaluates the BLOCK or EXPR for each element of LIST (locally setting $_ to each element) and composes a list of the results. In scalar context, returns the number of elements the expression would produce.\n\n```perl\nmy @doubled = map { $_ * 2 } @numbers;\nmy @names   = map { $_->{name} } @records;\n```",
399        }),
400        "grep" => Some(BuiltinDoc {
401            signature: "grep BLOCK LIST\ngrep EXPR, LIST",
402            description: "Evaluates BLOCK or EXPR for each element of LIST and returns the list of elements for which the expression is true. In scalar context, returns the number of matching elements rather than the list.\n\n```perl\nmy @evens = grep { $_ % 2 == 0 } @numbers;\nmy $count = grep { /pattern/ } @lines;\n```",
403        }),
404        "scalar" => Some(BuiltinDoc {
405            signature: "scalar EXPR",
406            description: "Forces EXPR to be interpreted in scalar context and returns the value of EXPR.",
407        }),
408        "wantarray" => Some(BuiltinDoc {
409            signature: "wantarray",
410            description: "Returns true if the subroutine is called in list context, false (defined but false) in scalar context, and undef in void context. Use to write context-sensitive subs: `return wantarray ? @list : $count;`",
411        }),
412
413        // Hash
414        "keys" => Some(BuiltinDoc {
415            signature: "keys HASH\nkeys ARRAY",
416            description: "In list context, returns all keys of the named hash or indices of an array. In scalar context, returns the number of keys (an integer count). Note: `scalar keys %h` is the idiomatic way to count hash entries.",
417        }),
418        "values" => Some(BuiltinDoc {
419            signature: "values HASH\nvalues ARRAY",
420            description: "In list context, returns all values of the named hash or values of an array. In scalar context, returns the number of values (same as scalar keys).",
421        }),
422        "each" => Some(BuiltinDoc {
423            signature: "each HASH\neach ARRAY",
424            description: "Returns the next key-value pair from the hash as a two-element list, or an empty list when exhausted. The iterator resets when the list is exhausted, when keys() or values() is called on the hash, or when the hash is modified. Call in a while loop: `while (my ($k, $v) = each %h) { ... }`",
425        }),
426        "exists" => Some(BuiltinDoc {
427            signature: "exists EXPR",
428            description: "Returns true if the specified hash key or array element exists, even if its value is undef.",
429        }),
430        "delete" => Some(BuiltinDoc {
431            signature: "delete EXPR",
432            description: "Deletes the specified keys and their associated values from a hash, or elements from an array.",
433        }),
434        "defined" => Some(BuiltinDoc {
435            signature: "defined EXPR\ndefined",
436            description: "Returns true if EXPR has a value other than undef.",
437        }),
438        "undef" => Some(BuiltinDoc {
439            signature: "undef EXPR\nundef",
440            description: "Undefines the value of EXPR. Can be used on scalars, arrays, hashes, subroutines, and typeglobs.",
441        }),
442
443        // References and OO
444        "ref" => Some(BuiltinDoc {
445            signature: "ref EXPR\nref",
446            description: "Returns a string indicating the type of reference EXPR is, or empty string if not a reference. E.g. HASH, ARRAY, SCALAR, CODE.",
447        }),
448        "bless" => Some(BuiltinDoc {
449            signature: "bless REF, CLASSNAME\nbless REF",
450            description: "Associates the referent of REF with package CLASSNAME (or current package). Returns the reference.",
451        }),
452        "blessed" => Some(BuiltinDoc {
453            signature: "blessed EXPR",
454            description: "Returns the name of the package EXPR is blessed into, or undef if EXPR is not a blessed reference. From Scalar::Util.",
455        }),
456        "tie" => Some(BuiltinDoc {
457            signature: "tie VARIABLE, CLASSNAME, LIST",
458            description: "Binds a variable to a package class that provides the implementation for the variable.",
459        }),
460        "untie" => Some(BuiltinDoc {
461            signature: "untie VARIABLE",
462            description: "Breaks the binding between a variable and its package.",
463        }),
464        "tied" => Some(BuiltinDoc {
465            signature: "tied VARIABLE",
466            description: "Returns a reference to the object underlying VARIABLE if it is tied, or undef if not.",
467        }),
468
469        // Tie magic methods
470        "TIESCALAR" => Some(BuiltinDoc {
471            signature: "TIESCALAR CLASSNAME, LIST",
472            description: "Constructor called when `tie $scalar, CLASSNAME, LIST` is used. Must return a blessed reference.",
473        }),
474        "TIEARRAY" => Some(BuiltinDoc {
475            signature: "TIEARRAY CLASSNAME, LIST",
476            description: "Constructor called when `tie @array, CLASSNAME, LIST` is used. Must return a blessed reference.",
477        }),
478        "TIEHASH" => Some(BuiltinDoc {
479            signature: "TIEHASH CLASSNAME, LIST",
480            description: "Constructor called when `tie %hash, CLASSNAME, LIST` is used. Must return a blessed reference.",
481        }),
482        "TIEHANDLE" => Some(BuiltinDoc {
483            signature: "TIEHANDLE CLASSNAME, LIST",
484            description: "Constructor called when `tie *FH, CLASSNAME, LIST` is used. Must return a blessed reference.",
485        }),
486        "FETCH" => Some(BuiltinDoc {
487            signature: "FETCH this",
488            description: "Called on every access of a tied scalar or array/hash element. Returns the value.",
489        }),
490        "STORE" => Some(BuiltinDoc {
491            signature: "STORE this, value",
492            description: "Called on every assignment to a tied scalar or array/hash element.",
493        }),
494        "FIRSTKEY" => Some(BuiltinDoc {
495            signature: "FIRSTKEY this",
496            description: "Called when `keys` or `each` is first invoked on a tied hash.",
497        }),
498        "NEXTKEY" => Some(BuiltinDoc {
499            signature: "NEXTKEY this, lastkey",
500            description: "Called during iteration of a tied hash with `each` or `keys`.",
501        }),
502        "DESTROY" => Some(BuiltinDoc {
503            signature: "DESTROY this",
504            description: "Called when the tied object goes out of scope or is explicitly untied.",
505        }),
506
507        // Control flow
508        "die" => Some(BuiltinDoc {
509            signature: "die LIST",
510            description: "Raises an exception. If LIST does not end in '\\n', Perl appends the script name and line number. In modules, prefer Carp::croak() to preserve the caller's stack frame. The exception is available in $@ after an eval block.",
511        }),
512        "warn" => Some(BuiltinDoc {
513            signature: "warn LIST",
514            description: "Prints a warning to STDERR. Does not exit. If the message does not end in '\\n', Perl appends the script name and line number. In modules, prefer Carp::carp() to report from the caller's perspective.",
515        }),
516        "eval" => Some(BuiltinDoc {
517            signature: "eval BLOCK\neval EXPR",
518            description: "Evaluates BLOCK or EXPR and traps exceptions. After the eval, check $@ for errors: if ($@) { ... }. BLOCK form is preferred — EXPR form (string eval) is a security risk and triggers the PL600 diagnostic.",
519        }),
520        // Carp module functions
521        "croak" => Some(BuiltinDoc {
522            signature: "croak LIST",
523            description: "Like die but reports the error from the caller's perspective. Part of the Carp module. Use instead of die in library code so the stack trace points to the caller, not the module internals.",
524        }),
525        "carp" => Some(BuiltinDoc {
526            signature: "carp LIST",
527            description: "Like warn but reports the warning from the caller's perspective. Part of the Carp module. Prefer over warn in library code.",
528        }),
529        "confess" => Some(BuiltinDoc {
530            signature: "confess LIST",
531            description: "Like croak but includes a full stack trace. Part of the Carp module. Use when the full call chain is needed for debugging.",
532        }),
533        "cluck" => Some(BuiltinDoc {
534            signature: "cluck LIST",
535            description: "Like carp but includes a full stack trace. Part of the Carp module. Use for warnings that benefit from call chain context.",
536        }),
537        "return" => Some(BuiltinDoc {
538            signature: "return EXPR\nreturn",
539            description: "Returns from a subroutine with the value of EXPR.",
540        }),
541        "next" => Some(BuiltinDoc {
542            signature: "next LABEL\nnext",
543            description: "Starts the next iteration of the loop (like C 'continue').",
544        }),
545        "last" => Some(BuiltinDoc {
546            signature: "last LABEL\nlast",
547            description: "Exits the loop immediately (like C 'break').",
548        }),
549        "redo" => Some(BuiltinDoc {
550            signature: "redo LABEL\nredo",
551            description: "Restarts the loop block without re-evaluating the condition.",
552        }),
553        "goto" => Some(BuiltinDoc {
554            signature: "goto LABEL\ngoto EXPR\ngoto &NAME",
555            description: "Transfers control to the named label, computed label, or substitutes a call to the named subroutine.",
556        }),
557        "caller" => Some(BuiltinDoc {
558            signature: "caller EXPR\ncaller",
559            description: "Without argument, returns (package, filename, line) in list context or the package name in scalar context. With EXPR returns additional call-frame info: (package, filename, line, subroutine, hasargs, wantarray, evaltext, is_require, hints, bitmask, hinthash).",
560        }),
561        "exit" => Some(BuiltinDoc {
562            signature: "exit EXPR\nexit",
563            description: "Exits the program with status EXPR (default 0). Calls END blocks and DESTROY methods before exit.",
564        }),
565
566        // Modules and loading
567        "require" => Some(BuiltinDoc {
568            signature: "require EXPR\nrequire",
569            description: "Loads a library module at runtime. Raises an exception on failure.",
570        }),
571        "use" => Some(BuiltinDoc {
572            signature: "use Module VERSION LIST\nuse Module VERSION\nuse Module LIST\nuse Module",
573            description: "Loads and imports a module at compile time. Equivalent to BEGIN { require Module; Module->import( LIST ); }",
574        }),
575        "do" => Some(BuiltinDoc {
576            signature: "do BLOCK\ndo EXPR",
577            description: "As do BLOCK: executes BLOCK and returns its value. As do EXPR: reads and executes a Perl file.",
578        }),
579
580        // Math
581        "abs" => Some(BuiltinDoc {
582            signature: "abs VALUE\nabs",
583            description: "Returns the absolute value of its argument.",
584        }),
585        "int" => Some(BuiltinDoc {
586            signature: "int EXPR\nint",
587            description: "Returns the integer portion of EXPR (truncates toward zero).",
588        }),
589        "sqrt" => Some(BuiltinDoc {
590            signature: "sqrt EXPR\nsqrt",
591            description: "Returns the positive square root of EXPR.",
592        }),
593        "log" => Some(BuiltinDoc {
594            signature: "log EXPR\nlog",
595            description: "Returns the natural logarithm (base e) of EXPR.",
596        }),
597        "exp" => Some(BuiltinDoc {
598            signature: "exp EXPR\nexp",
599            description: "Returns e (the natural logarithm base) to the power of EXPR.",
600        }),
601        "sin" => Some(BuiltinDoc {
602            signature: "sin EXPR\nsin",
603            description: "Returns the sine of EXPR (expressed in radians).",
604        }),
605        "cos" => Some(BuiltinDoc {
606            signature: "cos EXPR\ncos",
607            description: "Returns the cosine of EXPR (expressed in radians).",
608        }),
609        "atan2" => Some(BuiltinDoc {
610            signature: "atan2 Y, X",
611            description: "Returns the arctangent of Y/X in the range -PI to PI.",
612        }),
613        "rand" => Some(BuiltinDoc {
614            signature: "rand EXPR\nrand",
615            description: "Returns a random fractional number greater than or equal to 0 and less than EXPR (default 1).",
616        }),
617        "srand" => Some(BuiltinDoc {
618            signature: "srand EXPR\nsrand",
619            description: "Sets the random number seed for the rand operator.",
620        }),
621
622        // File tests and operations
623        "stat" => Some(BuiltinDoc {
624            signature: "stat FILEHANDLE\nstat EXPR",
625            description: "Returns a 13-element list (dev, ino, mode, nlink, uid, gid, rdev, size, atime, mtime, ctime, blksize, blocks) or an empty list on failure.",
626        }),
627        "lstat" => Some(BuiltinDoc {
628            signature: "lstat FILEHANDLE\nlstat EXPR",
629            description: "Like stat, but if the last component of the filename is a symbolic link, stats the link itself.",
630        }),
631        "chmod" => Some(BuiltinDoc {
632            signature: "chmod MODE, LIST",
633            description: "Changes the permissions of a list of files. Returns the number of files successfully changed.",
634        }),
635        "chown" => Some(BuiltinDoc {
636            signature: "chown UID, GID, LIST",
637            description: "Changes the owner and group of a list of files.",
638        }),
639        "unlink" => Some(BuiltinDoc {
640            signature: "unlink LIST\nunlink",
641            description: "Deletes a list of files. Returns the number of files successfully deleted.",
642        }),
643        "rename" => Some(BuiltinDoc {
644            signature: "rename OLDNAME, NEWNAME",
645            description: "Renames a file. Returns true on success, false otherwise.",
646        }),
647        "mkdir" => Some(BuiltinDoc {
648            signature: "mkdir FILENAME, MODE\nmkdir FILENAME",
649            description: "Creates the directory specified by FILENAME. Returns true on success.",
650        }),
651        "rmdir" => Some(BuiltinDoc {
652            signature: "rmdir FILENAME\nrmdir",
653            description: "Deletes the directory if it is empty. Returns true on success.",
654        }),
655        "opendir" => Some(BuiltinDoc {
656            signature: "opendir DIRHANDLE, EXPR",
657            description: "Opens a directory for reading by readdir.",
658        }),
659        "readdir" => Some(BuiltinDoc {
660            signature: "readdir DIRHANDLE",
661            description: "Returns the next entry (or entries in list context) from the directory.",
662        }),
663        "closedir" => Some(BuiltinDoc {
664            signature: "closedir DIRHANDLE",
665            description: "Closes a directory opened by opendir.",
666        }),
667        "link" => Some(BuiltinDoc {
668            signature: "link OLDFILE, NEWFILE",
669            description: "Creates a new hard link for an existing file.",
670        }),
671        "symlink" => Some(BuiltinDoc {
672            signature: "symlink OLDFILE, NEWFILE",
673            description: "Creates a new symbolic link for an existing file.",
674        }),
675        "readlink" => Some(BuiltinDoc {
676            signature: "readlink EXPR\nreadlink",
677            description: "Returns the value of a symbolic link.",
678        }),
679        "chdir" => Some(BuiltinDoc {
680            signature: "chdir EXPR\nchdir",
681            description: "Changes the working directory to EXPR (or home directory if omitted).",
682        }),
683        "glob" => Some(BuiltinDoc {
684            signature: "glob EXPR\nglob",
685            description: "Returns the filenames matching the shell-style glob pattern EXPR.",
686        }),
687
688        // System/Process
689        "system" => Some(BuiltinDoc {
690            signature: "system LIST\nsystem PROGRAM LIST",
691            description: "Executes a system command and returns the exit status. The return value is the exit status of the program as returned by the wait call.",
692        }),
693        "exec" => Some(BuiltinDoc {
694            signature: "exec LIST\nexec PROGRAM LIST",
695            description: "Replaces the current process with an external command. Never returns on success.",
696        }),
697        "fork" => Some(BuiltinDoc {
698            signature: "fork",
699            description: "Creates a child process. Returns the child pid to the parent, 0 to the child, or undef on failure.",
700        }),
701        "wait" => Some(BuiltinDoc {
702            signature: "wait",
703            description: "Waits for a child process to terminate and returns the pid of the deceased process.",
704        }),
705        "waitpid" => Some(BuiltinDoc {
706            signature: "waitpid PID, FLAGS",
707            description: "Waits for a particular child process to terminate and returns the pid.",
708        }),
709        "kill" => Some(BuiltinDoc {
710            signature: "kill SIGNAL, LIST",
711            description: "Sends a signal to a list of processes. Returns the number of processes signalled.",
712        }),
713        "sleep" => Some(BuiltinDoc {
714            signature: "sleep EXPR\nsleep",
715            description: "Causes the script to sleep for EXPR seconds (or forever if no argument).",
716        }),
717        "alarm" => Some(BuiltinDoc {
718            signature: "alarm SECONDS\nalarm",
719            description: "Arranges to have a SIGALRM delivered after SECONDS seconds.",
720        }),
721
722        // Encoding/Decoding
723        "pack" => Some(BuiltinDoc {
724            signature: "pack TEMPLATE, LIST",
725            description: "Takes a list of values and packs it into a binary string according to TEMPLATE.",
726        }),
727        "unpack" => Some(BuiltinDoc {
728            signature: "unpack TEMPLATE, EXPR",
729            description: "Takes a binary string and expands it into a list of values according to TEMPLATE.",
730        }),
731        "crypt" => Some(BuiltinDoc {
732            signature: "crypt PLAINTEXT, SALT",
733            description: "Encrypts a string using the system crypt() function.",
734        }),
735
736        // Time
737        "time" => Some(BuiltinDoc {
738            signature: "time",
739            description: "Returns the number of seconds since the epoch (January 1, 1970 UTC).",
740        }),
741        "localtime" => Some(BuiltinDoc {
742            signature: "localtime EXPR\nlocaltime",
743            description: "Converts a time value to a 9-element list with the time analyzed for the local time zone. In scalar context returns a ctime(3) string.",
744        }),
745        "gmtime" => Some(BuiltinDoc {
746            signature: "gmtime EXPR\ngmtime",
747            description: "Like localtime but uses Greenwich Mean Time (UTC). In list context returns a 9-element time list (sec, min, hour, mday, mon, year, wday, yday, isdst). In scalar context returns a ctime(3)-style string.",
748        }),
749
750        // Misc
751        "prototype" => Some(BuiltinDoc {
752            signature: "prototype FUNCTION",
753            description: "Returns the prototype of a function as a string, or undef if the function has no prototype.",
754        }),
755        "local" => Some(BuiltinDoc {
756            signature: "local EXPR",
757            description: "Temporarily localizes the listed global variables to the enclosing block. The original values are restored at the end of the block.",
758        }),
759        "my" => Some(BuiltinDoc {
760            signature: "my VARLIST\nmy TYPE VARLIST",
761            description: "Declares lexically scoped variables. Variables are visible only within the enclosing block.",
762        }),
763        "our" => Some(BuiltinDoc {
764            signature: "our VARLIST",
765            description: "Declares package variables visible in the current lexical scope without qualifying the name.",
766        }),
767        "state" => Some(BuiltinDoc {
768            signature: "state VARLIST",
769            description: "Declares lexically scoped variables that persist across calls to the enclosing subroutine (like C static variables).",
770        }),
771        "BEGIN" => Some(BuiltinDoc {
772            signature: "BEGIN { BLOCK }",
773            description: "Executed at **compile time**, before the rest of the program runs. \
774                          Used to initialize modules, set up the symbol table, or run code \
775                          that must complete before compilation continues. Multiple BEGIN \
776                          blocks run in the order they appear in source.",
777        }),
778        "END" => Some(BuiltinDoc {
779            signature: "END { BLOCK }",
780            description: "Executed at **program exit**, after the main program finishes (including \
781                          `die` and `exit`). Used for cleanup. Multiple END blocks run in \
782                          reverse order of definition. `$?` holds the exit status.",
783        }),
784        "INIT" => Some(BuiltinDoc {
785            signature: "INIT { BLOCK }",
786            description: "Executed after compilation completes but **before** the main program \
787                          runs. Runs in first-seen order. Unlike BEGIN, INIT sees the fully \
788                          compiled symbol table.",
789        }),
790        "CHECK" => Some(BuiltinDoc {
791            signature: "CHECK { BLOCK }",
792            description: "Executed at the **end of compilation**, after all BEGIN blocks. Runs \
793                          in reverse order of definition. Used by modules that need to inspect \
794                          or modify the compiled program before it runs (e.g. B::* modules).",
795        }),
796        "UNITCHECK" => Some(BuiltinDoc {
797            signature: "UNITCHECK { BLOCK }",
798            description: "Executed at the **end of the compilation unit** that defined it \
799                          (file, string eval, or require). Runs in reverse order of definition \
800                          within that unit. More granular than CHECK — each required file's \
801                          UNITCHECK runs before the requiring file's UNITCHECK.",
802        }),
803
804        // utf8:: namespace - core Perl Unicode encoding/decoding functions.
805        // These are always available without `use utf8;`.  The `use utf8;`
806        // pragma controls source-file encoding, not the availability of these
807        // functions.  See `perldoc utf8` for the complete specification.
808        "utf8::encode" => Some(BuiltinDoc {
809            signature: "utf8::encode(SCALAR)",
810            description: "Converts `SCALAR` **in-place** from Perl's internal Unicode \
811                          representation into Perl's extended UTF-8 octets. Each character is \
812                          replaced by one or more byte-valued characters, and the UTF-8 flag is \
813                          turned **off**. The function returns nothing.\n\n\
814                          Use this when you need raw UTF-8 bytes for I/O or binary protocols.  \
815                          It is the inverse of `utf8::decode`.\n\n\
816                          **Example**: `utf8::encode($bytes); # $bytes is now raw UTF-8 octets`",
817        }),
818        "utf8::decode" => Some(BuiltinDoc {
819            signature: "utf8::decode(SCALAR)",
820            description: "Attempts to decode `SCALAR` **in-place** from Perl's extended UTF-8 \
821                          octets into the corresponding character sequence. Returns true on \
822                          success and false on malformed input; on failure the string is left \
823                          unchanged. The UTF-8 flag is turned on only when the decoded string \
824                          contains multibyte UTF-8 characters.\n\n\
825                          This is the inverse of `utf8::encode`.\n\n\
826                          **Example**: `if (utf8::decode($input)) { ... } # $input is now a character string`",
827        }),
828        "utf8::is_utf8" => Some(BuiltinDoc {
829            signature: "utf8::is_utf8(SCALAR)",
830            description: "Returns **true** if the UTF-8 flag is enabled for `SCALAR`, \
831                          false otherwise.  The UTF-8 flag indicates that Perl is treating \
832                          the string as a sequence of characters rather than octets.\n\n\
833                          **Note**: A false return does not mean the string is invalid; it only \
834                          means the flag is off.  **Do not use this for most application logic** \
835                          - prefer character semantics throughout and only inspect the flag for \
836                          debugging, tests, filenames, or low-level serialization boundaries.",
837        }),
838        "utf8::valid" => Some(BuiltinDoc {
839            signature: "utf8::valid(SCALAR)",
840            description: "Internal consistency check for Perl's UTF-8 string state. Returns \
841                          true when `SCALAR` is either stored as bytes or is well-formed Perl \
842                          extended UTF-8 with the UTF-8 flag on. It can return true for ordinary \
843                          byte strings, so it is **not** a general raw-input UTF-8 validator.\n\n\
844                          This routine mainly exists so Perl's own tests can assert that string \
845                          operations left a scalar in a consistent internal state. Use \
846                          `utf8::decode` or the `Encode` module when validating external bytes.",
847        }),
848        "utf8::upgrade" => Some(BuiltinDoc {
849            signature: "utf8::upgrade(SCALAR)",
850            description: "Converts `SCALAR` **in-place** from the platform native 8-bit encoding \
851                          (Latin-1 on ASCII platforms, EBCDIC on EBCDIC platforms) to Perl's \
852                          internal Unicode representation. This is a no-op if the string is \
853                          already upgraded. Returns the number of octets necessary to represent \
854                          the string as UTF-8.\n\n\
855                          `upgrade` and `downgrade` are inverses.  Use `upgrade` when you have \
856                          a native byte string and want Perl to treat it with Unicode semantics. \
857                          It does not decode arbitrary external encodings; use `Encode` for that.",
858        }),
859        "utf8::downgrade" => Some(BuiltinDoc {
860            signature: "utf8::downgrade(SCALAR)\nutf8::downgrade(SCALAR, FAIL_OK)",
861            description: "Converts `SCALAR` **in-place** from Perl's internal Unicode \
862                          representation to the equivalent octet sequence in the platform native \
863                          8-bit encoding, turning off the UTF-8 flag. Only succeeds if the string \
864                          can be represented in that native encoding.\n\n\
865                          If `FAIL_OK` is true, returns false on failure instead of dying.  \
866                          If `FAIL_OK` is omitted or false, dies with an error message on failure.\n\n\
867                          **Example**: `utf8::downgrade($str, 1) or die \"String has wide chars\";`",
868        }),
869        "utf8::native_to_unicode" => Some(BuiltinDoc {
870            signature: "utf8::native_to_unicode(CODEPOINT)",
871            description: "Returns the Unicode code point corresponding to the native platform \
872                          code point `CODEPOINT`.  On ASCII platforms (virtually all modern \
873                          systems) this is a no-op.  Useful for writing truly portable code \
874                          that runs correctly on both ASCII and EBCDIC platforms.",
875        }),
876        "utf8::unicode_to_native" => Some(BuiltinDoc {
877            signature: "utf8::unicode_to_native(CODEPOINT)",
878            description: "Returns the native platform code point corresponding to the Unicode \
879                          code point `CODEPOINT`.  On ASCII platforms (virtually all modern \
880                          systems) this is a no-op.  The inverse of `utf8::native_to_unicode`.",
881        }),
882
883        _ => None,
884    }
885}
886
887/// Get documentation for a Moose/Moo/Mouse built-in type constraint.
888///
889/// Accepts both bare types (`Str`, `ArrayRef`) and parametrized forms
890/// (`ArrayRef[Int]`, `Maybe[Str]`).  For parametrized forms the base
891/// type is extracted and used for the lookup.
892///
893/// Returns signature and description suitable for LSP hover display,
894/// or `None` if the type is not a known Moose built-in.
895pub fn get_moose_type_documentation(type_str: &str) -> Option<BuiltinDoc> {
896    // Strip optional parametrization: "ArrayRef[Int]" -> "ArrayRef"
897    let base = type_str.split('[').next().unwrap_or(type_str).trim();
898
899    match base {
900        // Moose::Util::TypeConstraints — Any / Item
901        "Any" => Some(BuiltinDoc {
902            signature: "Any",
903            description: "The root type. Every value passes this constraint.",
904        }),
905        "Item" => Some(BuiltinDoc {
906            signature: "Item",
907            description: "Synonym for Any. Used as a base for the type hierarchy.",
908        }),
909        // Undef / Defined
910        "Undef" => Some(BuiltinDoc { signature: "Undef", description: "Accepts only undef." }),
911        "Defined" => Some(BuiltinDoc {
912            signature: "Defined",
913            description: "Accepts any defined value (anything that is not undef).",
914        }),
915        // Value / Bool
916        "Value" => Some(BuiltinDoc {
917            signature: "Value",
918            description: "Accepts any defined, non-reference value (scalars and strings).",
919        }),
920        "Bool" => Some(BuiltinDoc {
921            signature: "Bool",
922            description: "Accepts 1, 0, the empty string '', or undef — Perl's boolean-ish values.",
923        }),
924        // Strings
925        "Str" => Some(BuiltinDoc {
926            signature: "Str",
927            description: "Accepts any defined, non-reference scalar value (a string or number).",
928        }),
929        "Num" => Some(BuiltinDoc {
930            signature: "Num",
931            description: "Accepts any value that looks like a number (integer or float).",
932        }),
933        "Int" => Some(BuiltinDoc {
934            signature: "Int",
935            description: "Accepts only integer values (no decimal point).",
936        }),
937        "ClassName" => Some(BuiltinDoc {
938            signature: "ClassName",
939            description: "Accepts a string that is the name of a loaded Perl package/class.",
940        }),
941        "RoleName" => Some(BuiltinDoc {
942            signature: "RoleName",
943            description: "Accepts a string that is the name of a loaded Moose role.",
944        }),
945        // References
946        "Ref" => Some(BuiltinDoc { signature: "Ref", description: "Accepts any reference." }),
947        "ScalarRef" => Some(BuiltinDoc {
948            signature: "ScalarRef[TYPE]",
949            description: "Accepts a scalar reference. Optionally parametrized: ScalarRef[Int] requires the referent to satisfy Int.",
950        }),
951        "ArrayRef" => Some(BuiltinDoc {
952            signature: "ArrayRef[TYPE]",
953            description: "Accepts an array reference. Optionally parametrized: ArrayRef[Int] requires all elements to satisfy Int.",
954        }),
955        "HashRef" => Some(BuiltinDoc {
956            signature: "HashRef[TYPE]",
957            description: "Accepts a hash reference. Optionally parametrized: HashRef[Str] requires all values to satisfy Str.",
958        }),
959        "CodeRef" => Some(BuiltinDoc {
960            signature: "CodeRef",
961            description: "Accepts a code reference (subroutine reference).",
962        }),
963        "RegexpRef" => Some(BuiltinDoc {
964            signature: "RegexpRef",
965            description: "Accepts a compiled regular expression reference (qr//).",
966        }),
967        "GlobRef" => {
968            Some(BuiltinDoc { signature: "GlobRef", description: "Accepts a glob reference." })
969        }
970        "FileHandle" => Some(BuiltinDoc {
971            signature: "FileHandle",
972            description: "Accepts an IO object or a glob reference that can be used as a filehandle.",
973        }),
974        // Object / Role
975        "Object" => Some(BuiltinDoc {
976            signature: "Object",
977            description: "Accepts any blessed reference (an object).",
978        }),
979        // Maybe
980        "Maybe" => Some(BuiltinDoc {
981            signature: "Maybe[TYPE]",
982            description: "Accepts undef or any value satisfying TYPE. Useful for optional attributes: Maybe[Str] accepts either a string or undef.",
983        }),
984        // Type::Tiny extras commonly used with Moo
985        "InstanceOf" => Some(BuiltinDoc {
986            signature: "InstanceOf[CLASSNAME]",
987            description: "Accepts a blessed object that is an instance of CLASSNAME.",
988        }),
989        "ConsumerOf" => Some(BuiltinDoc {
990            signature: "ConsumerOf[ROLENAME]",
991            description: "Accepts a blessed object that consumes ROLENAME.",
992        }),
993        "HasMethods" => Some(BuiltinDoc {
994            signature: "HasMethods[METHOD, ...]",
995            description: "Accepts a blessed object that has all the listed methods.",
996        }),
997        "Dict" => Some(BuiltinDoc {
998            signature: "Dict[KEY => TYPE, ...]",
999            description: "Accepts a hash reference matching a specific key/type schema (Type::Tiny).",
1000        }),
1001        "Tuple" => Some(BuiltinDoc {
1002            signature: "Tuple[TYPE, ...]",
1003            description: "Accepts an array reference matching a specific positional type schema (Type::Tiny).",
1004        }),
1005        "Map" => Some(BuiltinDoc {
1006            signature: "Map[KEYTYPE, VALUETYPE]",
1007            description: "Accepts a hash reference where keys satisfy KEYTYPE and values satisfy VALUETYPE (Type::Tiny).",
1008        }),
1009        "Enum" => Some(BuiltinDoc {
1010            signature: "Enum[VALUE, ...]",
1011            description: "Accepts a string that is one of the listed values (Type::Tiny).",
1012        }),
1013
1014        _ => None,
1015    }
1016}
1017
1018/// Get documentation for a Perl subroutine or variable attribute.
1019///
1020/// Attributes are declared with `:name` syntax, e.g. `sub foo :lvalue { ... }`.
1021/// Pass the attribute name without the leading colon.
1022///
1023/// Returns signature and description suitable for LSP hover display,
1024/// or `None` if the attribute is not a known built-in.
1025pub fn get_attribute_documentation(attr: &str) -> Option<BuiltinDoc> {
1026    // Strip leading colon if present
1027    let name = attr.trim_start_matches(':');
1028
1029    match name {
1030        "lvalue" => Some(BuiltinDoc {
1031            signature: ":lvalue",
1032            description: "Marks a subroutine as an lvalue subroutine. The return value can be assigned to, enabling constructs like `foo() = 42;`.",
1033        }),
1034        "method" => Some(BuiltinDoc {
1035            signature: ":method",
1036            description: "Marks a subroutine as a method. Used by some attribute handlers to modify dispatch or prototype checking.",
1037        }),
1038        "prototype" => Some(BuiltinDoc {
1039            signature: ":prototype(PROTO)",
1040            description: "Sets the prototype of a subroutine. Controls how Perl parses calls to the sub (e.g. `prototype($$)` for two scalar args).",
1041        }),
1042        "const" => Some(BuiltinDoc {
1043            signature: ":const",
1044            description: "Marks a subroutine as a constant. The value is computed once and cached; subsequent calls return the cached value immutably.",
1045        }),
1046        "shared" => Some(BuiltinDoc {
1047            signature: ":shared",
1048            description: "Marks a variable or subroutine as shared across threads (requires `threads::shared`). The variable is accessible from all threads.",
1049        }),
1050        "weak_ref" => Some(BuiltinDoc {
1051            signature: ":weak_ref",
1052            description: "Marks a Moose/Moo attribute as a weak reference. The stored reference will not prevent the referent from being garbage-collected.",
1053        }),
1054        "locked" => Some(BuiltinDoc {
1055            signature: ":locked",
1056            description: "Marks a subroutine so concurrent callers are serialized. Useful for thread-safe methods that must not run at the same time.",
1057        }),
1058        "overload" => Some(BuiltinDoc {
1059            signature: ":overload(OP)",
1060            description: "Declares that a subroutine implements an operator overload for OP.",
1061        }),
1062        _ => None,
1063    }
1064}
1065
1066/// Structured exception context for exception-family functions.
1067///
1068/// Used by code actions and semantic analysis to understand exception
1069/// handling semantics — upgrade paths (die → croak) and associated
1070/// error variables.
1071#[derive(Debug, Clone)]
1072pub struct ExceptionContext {
1073    /// Special variable that captures the exception after an eval block (e.g. `$@`).
1074    pub error_variable: Option<String>,
1075    /// Recommended replacement function, if the current function is not preferred
1076    /// (e.g. `die` → `Carp::croak`, `warn` → `Carp::carp`).
1077    pub preferred_alternative: Option<String>,
1078}
1079
1080/// Check if a function name is in the Perl exception family.
1081///
1082/// Returns `true` for: `die`, `warn`, `croak`, `carp`, `confess`, `cluck`.
1083///
1084/// This is a classification helper for future diagnostic and code-action use.
1085/// It is not currently called from any LSP code path — callers may use it to
1086/// decide whether to invoke [`get_exception_context`].
1087///
1088/// # Examples
1089/// ```
1090/// use perl_semantic_analyzer::analysis::semantic::is_exception_function;
1091///
1092/// assert!(is_exception_function("die"));
1093/// assert!(is_exception_function("croak"));
1094/// assert!(!is_exception_function("print"));
1095/// ```
1096pub fn is_exception_function(name: &str) -> bool {
1097    matches!(name, "die" | "warn" | "croak" | "carp" | "confess" | "cluck")
1098}
1099
1100/// Get exception context for upgrade suggestions and error variables.
1101///
1102/// Returns metadata about exception handling semantics:
1103/// - `error_variable`: special variable capturing the exception (`$@`)
1104/// - `preferred_alternative`: recommended upgrade path (`die` → `Carp::croak`)
1105///
1106/// Returns `None` for non-exception functions (e.g. `eval`, `print`).
1107///
1108/// # Examples
1109/// ```
1110/// use perl_semantic_analyzer::analysis::semantic::get_exception_context;
1111///
1112/// let die_ctx = get_exception_context("die").unwrap();
1113/// assert_eq!(die_ctx.error_variable, Some("$@".to_string()));
1114/// assert_eq!(die_ctx.preferred_alternative, Some("Carp::croak".to_string()));
1115///
1116/// let croak_ctx = get_exception_context("croak").unwrap();
1117/// assert_eq!(croak_ctx.preferred_alternative, None);  // already preferred
1118/// ```
1119pub fn get_exception_context(name: &str) -> Option<ExceptionContext> {
1120    match name {
1121        "die" => Some(ExceptionContext {
1122            error_variable: Some("$@".to_string()),
1123            preferred_alternative: Some("Carp::croak".to_string()),
1124        }),
1125        "warn" => Some(ExceptionContext {
1126            error_variable: None,
1127            preferred_alternative: Some("Carp::carp".to_string()),
1128        }),
1129        "croak" | "confess" => Some(ExceptionContext {
1130            error_variable: Some("$@".to_string()),
1131            preferred_alternative: None,
1132        }),
1133        "carp" | "cluck" => {
1134            Some(ExceptionContext { error_variable: None, preferred_alternative: None })
1135        }
1136        _ => None,
1137    }
1138}
1139
1140/// Documentation entry for a Perl pragma.
1141///
1142/// Provides a brief summary and description for display in hover tooltips
1143/// when a user hovers over a `use strict;`, `use warnings;`, etc. statement.
1144pub struct PragmaDoc {
1145    /// One-line purpose summary
1146    pub summary: &'static str,
1147    /// Detailed description of what the pragma enables or does
1148    pub description: &'static str,
1149    /// Minimum Perl version required, if any (e.g. `"v5.10"`)
1150    pub version_required: Option<&'static str>,
1151}
1152
1153/// Get hover documentation for a Perl pragma.
1154///
1155/// Returns a [`PragmaDoc`] for known pragmas used in `use <pragma>` or `no <pragma>`
1156/// statements, or `None` if the name is not a recognized pragma.
1157///
1158/// Pragmas covered: `strict`, `warnings`, `utf8`, `feature`, `constant`, `vars`,
1159/// `autodie`, `encoding`, `locale`, `parent`, `base`, `lib`, `Exporter`.
1160///
1161/// Version pragmas (`v5.36`, `5.036`, etc.) are detected by
1162/// [`crate::analysis::pragma::parse_perl_version`] separately.
1163pub fn get_pragma_documentation(name: &str) -> Option<PragmaDoc> {
1164    match name {
1165        "strict" => Some(PragmaDoc {
1166            summary: "Enable strict variable/subroutine/reference checking",
1167            description: "Restricts unsafe Perl constructs. Enables compile-time errors for \
1168                undeclared variables (`vars`), bareword subroutine names (`subs`), and symbolic \
1169                references (`refs`). Use `use strict;` to enable all three categories at once, \
1170                or `use strict 'vars'` for individual categories.\n\n\
1171                **Common usage**: Always include `use strict;` at the top of every Perl file.",
1172            version_required: None,
1173        }),
1174        "warnings" => Some(PragmaDoc {
1175            summary: "Enable runtime and compile-time warnings",
1176            description: "Enables a wide range of optional warnings about potentially dangerous \
1177                or deprecated code patterns. Categories include: `numeric`, `uninitialized`, \
1178                `deprecated`, `syntax`, `misc`, and many more.\n\n\
1179                Use `use warnings;` to enable all warnings, or `use warnings 'uninitialized'` \
1180                for specific categories. Use `no warnings 'once'` to suppress individual categories.\n\n\
1181                **Common usage**: Always pair with `use strict;`.",
1182            version_required: None,
1183        }),
1184        "utf8" => Some(PragmaDoc {
1185            summary: "Treat the source file as UTF-8 encoded",
1186            description: "Tells the Perl parser that the source code is encoded in UTF-8. \
1187                Allows Unicode identifiers, string literals, and comments in the source file. \
1188                Does **not** affect how STDIN/STDOUT/STDERR handle encoding — use \
1189                `binmode(STDOUT, ':utf8')` or `open` with `:utf8` layer for that.\n\n\
1190                **Common usage**: `use utf8;` at the top of files with non-ASCII identifiers \
1191                or string constants.",
1192            version_required: Some("v5.6"),
1193        }),
1194        "feature" => Some(PragmaDoc {
1195            summary: "Enable experimental or version-specific Perl features",
1196            description: "Enables named language features that are off by default. Key features:\n\
1197                - `say` — like `print` but appends a newline (v5.10+)\n\
1198                - `state` — persistent lexical variables (v5.10+)\n\
1199                - `signatures` — formal subroutine signatures (stable v5.36+)\n\
1200                - `try` — `try`/`catch` exception handling (experimental v5.34+)\n\
1201                - `class` — native OO with `class`/`method`/`field` (v5.38+)\n\
1202                - `defer` — `defer` blocks run at scope exit (v5.36+)\n\n\
1203                Features are also enabled implicitly by `use v5.XX;` version declarations.\n\n\
1204                **Example**: `use feature 'say', 'state';`",
1205            version_required: Some("v5.10"),
1206        }),
1207        "constant" => Some(PragmaDoc {
1208            summary: "Declare compile-time constants",
1209            description: "Creates named constants that are inlined at compile time, making them \
1210                more efficient than regular variables and preventing accidental reassignment.\n\n\
1211                **Single constant**: `use constant PI => 3.14159;`\n\
1212                **Multiple constants**: `use constant { MAX => 100, MIN => 0 };`\n\n\
1213                Constants are accessed without a sigil: `print PI;` or `print MAX;`.\n\
1214                They cannot be interpolated directly in strings — use `@{[PI]}` as a workaround.",
1215            version_required: None,
1216        }),
1217        "vars" => Some(PragmaDoc {
1218            summary: "Pre-declare package (global) variables",
1219            description: "Pre-declares package global variables so they can be used under \
1220                `use strict 'vars'` without a full package-qualified name. This is a legacy \
1221                pragma — prefer `our $var;` in modern code.\n\n\
1222                **Example**: `use vars qw($VERSION @EXPORT);`\n\n\
1223                **Modern alternative**: `our $VERSION; our @EXPORT;`",
1224            version_required: None,
1225        }),
1226        "autodie" => Some(PragmaDoc {
1227            summary: "Automatic exception throwing on system call failures",
1228            description: "Replaces built-in functions (`open`, `close`, `read`, `write`, \
1229                `system`, `exec`, etc.) with versions that automatically `die` on failure, \
1230                eliminating boilerplate `or die` checks.\n\n\
1231                **Example**: `use autodie;` — all builtins now throw on error.\n\
1232                **Selective**: `use autodie qw(open close);` — only specified functions.\n\n\
1233                Exceptions are `autodie::exception` objects with detailed failure information.",
1234            version_required: Some("v5.10.1"),
1235        }),
1236        "encoding" => Some(PragmaDoc {
1237            summary: "Set source encoding (legacy — prefer utf8 pragma)",
1238            description: "Specifies the character encoding of the Perl source file and optionally \
1239                sets default I/O encoding. This pragma is **deprecated** — prefer `use utf8;` \
1240                for source encoding.\n\n\
1241                **Example**: `use encoding 'utf8';`\n\n\
1242                **Preferred alternative**: `use utf8;` for source + explicit `binmode` calls \
1243                for I/O encoding.",
1244            version_required: Some("v5.6"),
1245        }),
1246        "locale" => Some(PragmaDoc {
1247            summary: "Enable locale-aware string operations",
1248            description: "Makes string comparisons, case conversion, and character classification \
1249                functions use the current system locale settings (LC_CTYPE, LC_COLLATE, etc.).\n\n\
1250                **Note**: Locale handling can cause subtle encoding issues. Prefer Unicode \
1251                semantics with `use utf8;` and `use feature 'unicode_strings';` where possible.\n\n\
1252                **Example**: `use locale;`",
1253            version_required: None,
1254        }),
1255        "parent" => Some(PragmaDoc {
1256            summary: "Establish ISA relationship with parent classes",
1257            description: "Sets up inheritance by loading the listed modules and pushing them \
1258                into `@ISA`. The modern replacement for `use base`.\n\n\
1259                **Example**: `use parent 'Animal';` or `use parent qw(Animal Printable);`\n\n\
1260                Unlike `use base`, `parent` always `require`s the parent modules and does not \
1261                set `$VERSION` or `@EXPORT` by default.",
1262            version_required: Some("v5.10.1"),
1263        }),
1264        "base" => Some(PragmaDoc {
1265            summary: "Establish ISA relationship (legacy — prefer parent pragma)",
1266            description: "Sets up inheritance by loading parent modules and updating `@ISA`. \
1267                Legacy alternative to `use parent`.\n\n\
1268                **Example**: `use base 'Animal';` or `use base qw(Animal Printable);`\n\n\
1269                **Preferred alternative**: `use parent qw(...);` — cleaner semantics \
1270                without the `$VERSION`/`@EXPORT` side-effects of `use base`.",
1271            version_required: None,
1272        }),
1273        "lib" => Some(PragmaDoc {
1274            summary: "Add directories to @INC at compile time",
1275            description: "Prepends directories to `@INC` so that subsequent `use` and `require` \
1276                statements can find modules there.\n\n\
1277                **Example**: `use lib '/path/to/modules';`\n\
1278                **Relative path**: `use lib 'lib';` — adds `./lib` to `@INC`.\n\n\
1279                Often used in test files: `use lib 't/lib';`",
1280            version_required: None,
1281        }),
1282        "Exporter" => Some(PragmaDoc {
1283            summary: "Default symbol exporter for Perl modules",
1284            description: "Provides the standard mechanism for modules to export symbols into \
1285                the caller's namespace. Configure `@EXPORT` and `@EXPORT_OK` to control \
1286                what gets exported.\n\n\
1287                **Typical usage**:\n\
1288                ```perl\n\
1289                use Exporter 'import';\n\
1290                our @EXPORT_OK = qw(helper_fn);\n\
1291                ```\n\n\
1292                Or via inheritance: `use parent 'Exporter';`",
1293            version_required: None,
1294        }),
1295        _ => None,
1296    }
1297}
1298
1299#[cfg(test)]
1300mod tests {
1301    use super::{
1302        get_builtin_documentation, get_pragma_documentation, is_builtin_function,
1303        is_control_keyword,
1304    };
1305
1306    #[test]
1307    fn test_get_builtin_documentation_begin() -> Result<(), Box<dyn std::error::Error>> {
1308        let doc = get_builtin_documentation("BEGIN").ok_or("BEGIN should have docs")?;
1309        assert!(
1310            doc.description.contains("compile time") || doc.description.contains("compile-time"),
1311            "BEGIN doc should mention compile time, got: {}",
1312            doc.description
1313        );
1314        Ok(())
1315    }
1316
1317    #[test]
1318    fn test_get_builtin_documentation_end() -> Result<(), Box<dyn std::error::Error>> {
1319        let doc = get_builtin_documentation("END").ok_or("END should have docs")?;
1320        assert!(
1321            doc.description.contains("exit") || doc.description.contains("cleanup"),
1322            "END doc should mention exit or cleanup, got: {}",
1323            doc.description
1324        );
1325        Ok(())
1326    }
1327
1328    #[test]
1329    fn test_get_builtin_documentation_check() -> Result<(), Box<dyn std::error::Error>> {
1330        let doc = get_builtin_documentation("CHECK").ok_or("CHECK should have docs")?;
1331        assert!(
1332            doc.description.contains("compilation") || doc.description.contains("compile"),
1333            "CHECK doc should mention compilation, got: {}",
1334            doc.description
1335        );
1336        Ok(())
1337    }
1338
1339    #[test]
1340    fn test_get_builtin_documentation_init() -> Result<(), Box<dyn std::error::Error>> {
1341        let doc = get_builtin_documentation("INIT").ok_or("INIT should have docs")?;
1342        assert!(
1343            doc.description.contains("compilation") || doc.description.contains("before"),
1344            "INIT doc should mention post-compile execution, got: {}",
1345            doc.description
1346        );
1347        Ok(())
1348    }
1349
1350    #[test]
1351    fn test_get_builtin_documentation_unitcheck() -> Result<(), Box<dyn std::error::Error>> {
1352        let doc = get_builtin_documentation("UNITCHECK").ok_or("UNITCHECK should have docs")?;
1353        assert!(
1354            doc.description.contains("compilation unit") || doc.description.contains("unit"),
1355            "UNITCHECK doc should mention compilation unit scope, got: {}",
1356            doc.description
1357        );
1358        Ok(())
1359    }
1360
1361    #[test]
1362    fn test_core_prefixed_builtin_lookups() -> Result<(), Box<dyn std::error::Error>> {
1363        assert!(is_builtin_function("CORE::length"), "CORE::length should be recognized");
1364        assert!(is_control_keyword("CORE::die"), "CORE::die should be recognized as control");
1365
1366        let doc =
1367            get_builtin_documentation("CORE::length").ok_or("CORE::length should have docs")?;
1368        assert!(
1369            doc.signature.contains("length"),
1370            "CORE::length should resolve to length docs, got: {}",
1371            doc.signature
1372        );
1373        Ok(())
1374    }
1375
1376    // ── pragma documentation tests ──────────────────────────────────────────
1377
1378    #[test]
1379    fn test_get_pragma_documentation_strict() -> Result<(), Box<dyn std::error::Error>> {
1380        let doc = get_pragma_documentation("strict").ok_or("strict should have docs")?;
1381        assert!(
1382            doc.description.contains("strict") || doc.description.contains("variable"),
1383            "strict doc should describe variable checking, got: {}",
1384            doc.description
1385        );
1386        assert!(
1387            doc.summary.contains("strict"),
1388            "strict summary should mention strict, got: {}",
1389            doc.summary
1390        );
1391        Ok(())
1392    }
1393
1394    #[test]
1395    fn test_get_pragma_documentation_warnings() -> Result<(), Box<dyn std::error::Error>> {
1396        let doc = get_pragma_documentation("warnings").ok_or("warnings should have docs")?;
1397        assert!(
1398            doc.description.contains("warning"),
1399            "warnings doc should describe warnings, got: {}",
1400            doc.description
1401        );
1402        Ok(())
1403    }
1404
1405    #[test]
1406    fn test_get_pragma_documentation_utf8() -> Result<(), Box<dyn std::error::Error>> {
1407        let doc = get_pragma_documentation("utf8").ok_or("utf8 should have docs")?;
1408        assert!(
1409            doc.description.contains("UTF-8") || doc.description.contains("Unicode"),
1410            "utf8 doc should mention UTF-8 or Unicode, got: {}",
1411            doc.description
1412        );
1413        assert_eq!(
1414            doc.version_required,
1415            Some("v5.6"),
1416            "utf8 requires v5.6, got: {:?}",
1417            doc.version_required
1418        );
1419        Ok(())
1420    }
1421
1422    #[test]
1423    fn test_get_pragma_documentation_feature() -> Result<(), Box<dyn std::error::Error>> {
1424        let doc = get_pragma_documentation("feature").ok_or("feature should have docs")?;
1425        assert!(
1426            doc.description.contains("say") || doc.description.contains("feature"),
1427            "feature doc should mention specific features, got: {}",
1428            doc.description
1429        );
1430        assert!(doc.version_required.is_some(), "feature pragma should have a version requirement");
1431        Ok(())
1432    }
1433
1434    #[test]
1435    fn test_get_pragma_documentation_constant() -> Result<(), Box<dyn std::error::Error>> {
1436        let doc = get_pragma_documentation("constant").ok_or("constant should have docs")?;
1437        assert!(
1438            doc.description.contains("constant") || doc.description.contains("compile"),
1439            "constant doc should mention constants, got: {}",
1440            doc.description
1441        );
1442        Ok(())
1443    }
1444
1445    #[test]
1446    fn test_get_pragma_documentation_autodie() -> Result<(), Box<dyn std::error::Error>> {
1447        let doc = get_pragma_documentation("autodie").ok_or("autodie should have docs")?;
1448        assert!(
1449            doc.description.contains("die") || doc.description.contains("exception"),
1450            "autodie doc should mention die or exceptions, got: {}",
1451            doc.description
1452        );
1453        Ok(())
1454    }
1455
1456    #[test]
1457    fn test_get_pragma_documentation_parent() -> Result<(), Box<dyn std::error::Error>> {
1458        let doc = get_pragma_documentation("parent").ok_or("parent should have docs")?;
1459        assert!(
1460            doc.description.contains("ISA") || doc.description.contains("inherit"),
1461            "parent doc should mention ISA or inheritance, got: {}",
1462            doc.description
1463        );
1464        Ok(())
1465    }
1466
1467    #[test]
1468    fn test_get_pragma_documentation_unknown_returns_none() {
1469        assert!(
1470            get_pragma_documentation("SomeArbitraryModule").is_none(),
1471            "Non-pragma module should return None"
1472        );
1473        assert!(
1474            get_pragma_documentation("Moose").is_none(),
1475            "Moose is not a pragma and should return None"
1476        );
1477    }
1478
1479    // -- utf8:: function documentation tests ------------------------------------
1480
1481    #[test]
1482    fn test_utf8_encode_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1483        let doc = get_builtin_documentation("utf8::encode")
1484            .ok_or("utf8::encode should have hover docs")?;
1485        assert!(
1486            doc.signature.contains("utf8::encode"),
1487            "utf8::encode signature should include the function name, got: {}",
1488            doc.signature
1489        );
1490        assert!(
1491            doc.description.contains("UTF-8") || doc.description.contains("octet"),
1492            "utf8::encode description should mention UTF-8 encoding, got: {}",
1493            doc.description
1494        );
1495        assert!(
1496            doc.description.to_lowercase().contains("in-place")
1497                || doc.description.to_lowercase().contains("in place"),
1498            "utf8::encode description should mention in-place mutation, got: {}",
1499            doc.description
1500        );
1501        Ok(())
1502    }
1503
1504    #[test]
1505    fn test_utf8_decode_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1506        let doc = get_builtin_documentation("utf8::decode")
1507            .ok_or("utf8::decode should have hover docs")?;
1508        assert!(
1509            doc.signature.contains("utf8::decode"),
1510            "utf8::decode signature should include the function name, got: {}",
1511            doc.signature
1512        );
1513        assert!(
1514            doc.description.contains("UTF-8") || doc.description.contains("Unicode"),
1515            "utf8::decode description should mention UTF-8 or Unicode, got: {}",
1516            doc.description
1517        );
1518        Ok(())
1519    }
1520
1521    #[test]
1522    fn test_utf8_is_utf8_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1523        let doc = get_builtin_documentation("utf8::is_utf8")
1524            .ok_or("utf8::is_utf8 should have hover docs")?;
1525        assert!(
1526            doc.signature.contains("utf8::is_utf8"),
1527            "utf8::is_utf8 signature should include the function name, got: {}",
1528            doc.signature
1529        );
1530        assert!(
1531            doc.description.contains("flag") || doc.description.contains("UTF-8"),
1532            "utf8::is_utf8 description should mention the UTF-8 flag, got: {}",
1533            doc.description
1534        );
1535        Ok(())
1536    }
1537
1538    #[test]
1539    fn test_utf8_valid_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1540        let doc =
1541            get_builtin_documentation("utf8::valid").ok_or("utf8::valid should have hover docs")?;
1542        assert!(
1543            doc.signature.contains("utf8::valid"),
1544            "utf8::valid signature should include the function name, got: {}",
1545            doc.signature
1546        );
1547        assert!(
1548            doc.description.contains("Internal consistency")
1549                || doc.description.contains("consistent internal state"),
1550            "utf8::valid description should describe the internal consistency check, got: {}",
1551            doc.description
1552        );
1553        assert!(
1554            doc.description.contains("not")
1555                && doc.description.contains("raw-input UTF-8 validator"),
1556            "utf8::valid description should avoid presenting it as a raw input validator, got: {}",
1557            doc.description
1558        );
1559        Ok(())
1560    }
1561
1562    #[test]
1563    fn test_utf8_upgrade_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1564        let doc = get_builtin_documentation("utf8::upgrade")
1565            .ok_or("utf8::upgrade should have hover docs")?;
1566        assert!(
1567            doc.signature.contains("utf8::upgrade"),
1568            "utf8::upgrade signature should include the function name, got: {}",
1569            doc.signature
1570        );
1571        assert!(
1572            doc.description.contains("Unicode") || doc.description.contains("flag"),
1573            "utf8::upgrade description should mention Unicode representation, got: {}",
1574            doc.description
1575        );
1576        Ok(())
1577    }
1578
1579    #[test]
1580    fn test_utf8_downgrade_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1581        let doc = get_builtin_documentation("utf8::downgrade")
1582            .ok_or("utf8::downgrade should have hover docs")?;
1583        assert!(
1584            doc.signature.contains("utf8::downgrade"),
1585            "utf8::downgrade signature should include the function name, got: {}",
1586            doc.signature
1587        );
1588        assert!(
1589            doc.signature.contains("FAIL_OK"),
1590            "utf8::downgrade signature should include the optional FAIL_OK parameter, got: {}",
1591            doc.signature
1592        );
1593        assert!(
1594            doc.description.contains("byte") || doc.description.contains("octet"),
1595            "utf8::downgrade description should mention byte conversion, got: {}",
1596            doc.description
1597        );
1598        Ok(())
1599    }
1600
1601    #[test]
1602    fn test_utf8_native_to_unicode_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1603        let doc = get_builtin_documentation("utf8::native_to_unicode")
1604            .ok_or("utf8::native_to_unicode should have hover docs")?;
1605        assert!(
1606            doc.signature.contains("utf8::native_to_unicode"),
1607            "utf8::native_to_unicode signature should include the function name, got: {}",
1608            doc.signature
1609        );
1610        assert!(
1611            doc.description.contains("Unicode") || doc.description.contains("code point"),
1612            "utf8::native_to_unicode description should mention Unicode code points, got: {}",
1613            doc.description
1614        );
1615        Ok(())
1616    }
1617
1618    #[test]
1619    fn test_utf8_unicode_to_native_has_docs() -> Result<(), Box<dyn std::error::Error>> {
1620        let doc = get_builtin_documentation("utf8::unicode_to_native")
1621            .ok_or("utf8::unicode_to_native should have hover docs")?;
1622        assert!(
1623            doc.signature.contains("utf8::unicode_to_native"),
1624            "utf8::unicode_to_native signature should include the function name, got: {}",
1625            doc.signature
1626        );
1627        assert!(
1628            doc.description.contains("Unicode") || doc.description.contains("native"),
1629            "utf8::unicode_to_native description should mention Unicode or native, got: {}",
1630            doc.description
1631        );
1632        Ok(())
1633    }
1634
1635    #[test]
1636    fn test_all_utf8_builtin_functions_have_docs() {
1637        // Verify every utf8:: function registered in the PHF table has a hover doc entry.
1638        // This acts as a regression guard: if a new utf8:: function is added to the
1639        // PHF lookup without a matching doc entry, this test will catch it.
1640        let utf8_functions = [
1641            "utf8::encode",
1642            "utf8::decode",
1643            "utf8::is_utf8",
1644            "utf8::valid",
1645            "utf8::upgrade",
1646            "utf8::downgrade",
1647            "utf8::native_to_unicode",
1648            "utf8::unicode_to_native",
1649        ];
1650        for func in &utf8_functions {
1651            assert!(
1652                get_builtin_documentation(func).is_some(),
1653                "utf8 builtin function '{func}' is registered in the PHF table but has no hover doc"
1654            );
1655        }
1656    }
1657}