use std::env;
use std::fs;
use std::path::PathBuf;
fn main() {
println!("cargo:rerun-if-changed=src/builtins.rs");
println!("cargo:rerun-if-changed=src/parser.rs");
println!("cargo:rerun-if-changed=src/lsp.rs");
println!("cargo:rerun-if-changed=build.rs");
let builtins_src = fs::read_to_string("src/builtins.rs").expect("read src/builtins.rs");
let parser_src = fs::read_to_string("src/parser.rs").expect("read src/parser.rs");
let lsp_src = fs::read_to_string("src/lsp.rs").expect("read src/lsp.rs");
let arms = extract_try_builtin_arms(&builtins_src);
let core_cats = extract_categorized_names(&parser_src, "fn is_perl5_core", "matches!");
let ext_cats = extract_categorized_names(&parser_src, "fn stryke_extension_name", "match name");
let mut descriptions = extract_builtin_doc_comments(&builtins_src, &arms);
let lsp_descs = extract_lsp_descriptions(&lsp_src);
let builtin_keys: std::collections::HashSet<String> =
descriptions.iter().map(|(k, _)| k.clone()).collect();
for (k, v) in lsp_descs {
if !builtin_keys.contains(&k) {
descriptions.push((k, v));
}
}
descriptions.sort();
descriptions.dedup_by(|a, b| a.0 == b.0);
let mut core_pairs: Vec<(String, String)> = core_cats.clone();
core_pairs.sort();
core_pairs.dedup_by(|a, b| a.0 == b.0);
let core_set: std::collections::HashSet<String> =
core_pairs.iter().map(|(n, _)| n.clone()).collect();
let mut ext_pairs: Vec<(String, String)> = Vec::new();
let mut ext_seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for (name, cat) in &ext_cats {
if core_set.contains(name.as_str()) {
continue; }
if ext_seen.insert(name.clone()) {
ext_pairs.push((name.clone(), cat.clone()));
}
}
for arm in &arms {
if let Some(primary) = arm.first() {
if core_set.contains(primary.as_str()) {
continue;
}
if ext_seen.insert(primary.clone()) {
ext_pairs.push((primary.clone(), "uncategorized".to_string()));
}
}
}
ext_pairs.sort();
let primary_to_cat: std::collections::HashMap<String, String> = core_pairs
.iter()
.chain(ext_pairs.iter())
.map(|(n, c)| (n.clone(), c.clone()))
.collect();
let mut all_pairs: Vec<(String, String)> = Vec::new();
let mut all_seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for (n, c) in core_pairs.iter().chain(ext_pairs.iter()) {
if all_seen.insert(n.clone()) {
all_pairs.push((n.clone(), c.clone()));
}
}
for arm in &arms {
let Some((primary, rest)) = arm.split_first() else {
continue;
};
let Some(cat) = primary_to_cat.get(primary) else {
continue;
};
for alias in rest {
if all_seen.insert(alias.clone()) {
all_pairs.push((alias.clone(), cat.clone()));
}
}
}
all_pairs.sort();
let mut cat_pairs: Vec<(String, String)> = core_pairs.to_vec();
cat_pairs.extend(ext_pairs.iter().cloned());
cat_pairs.sort();
let out_dir = env::var("OUT_DIR").expect("OUT_DIR not set");
let dest = PathBuf::from(out_dir).join("reflection.rs");
let mut body = String::new();
body.push_str("// GENERATED by build.rs — do not edit.\n\n");
body.push_str("pub(crate) const BUILTIN_ARMS: &[&[&str]] = &[\n");
for arm in &arms {
body.push_str(" &[");
for (i, n) in arm.iter().enumerate() {
if i > 0 {
body.push_str(", ");
}
body.push_str(&format!("{:?}", n));
}
body.push_str("],\n");
}
body.push_str("];\n\n");
body.push_str("pub(crate) const CATEGORY_MAP: &[(&str, &str)] = &[\n");
for (n, c) in &cat_pairs {
body.push_str(&format!(" ({:?}, {:?}),\n", n, c));
}
body.push_str("];\n\n");
body.push_str("pub(crate) const CORE_CATEGORY_MAP: &[(&str, &str)] = &[\n");
for (n, c) in &core_pairs {
body.push_str(&format!(" ({:?}, {:?}),\n", n, c));
}
body.push_str("];\n\n");
body.push_str("pub(crate) const EXT_CATEGORY_MAP: &[(&str, &str)] = &[\n");
for (n, c) in &ext_pairs {
body.push_str(&format!(" ({:?}, {:?}),\n", n, c));
}
body.push_str("];\n\n");
body.push_str("pub(crate) const ALL_CATEGORY_MAP: &[(&str, &str)] = &[\n");
for (n, c) in &all_pairs {
body.push_str(&format!(" ({:?}, {:?}),\n", n, c));
}
body.push_str("];\n\n");
body.push_str("pub(crate) const DESCRIPTIONS: &[(&str, &str)] = &[\n");
for (n, d) in &descriptions {
body.push_str(&format!(" ({:?}, {:?}),\n", n, d));
}
body.push_str("];\n");
fs::write(&dest, body).expect("write reflection.rs");
}
fn extract_try_builtin_arms(src: &str) -> Vec<Vec<String>> {
let fn_pos = src
.find("pub(crate) fn try_builtin")
.expect("try_builtin not found");
let after = &src[fn_pos..];
let match_rel = after
.find("match name {")
.expect("`match name {` not found inside try_builtin");
let body_start = fn_pos + match_rel + "match name {".len();
let body_end = find_matching_rbrace(src.as_bytes(), body_start);
let body = &src[body_start..body_end];
let mut arms: Vec<Vec<String>> = Vec::new();
let bb = body.as_bytes();
let mut inner = 0i32;
let mut arm_start = 0usize;
let mut i = 0usize;
while i < body.len() {
let c = bb[i];
if c == b'"' {
i = skip_string(bb, i);
continue;
}
if c == b'/' && bb.get(i + 1) == Some(&b'/') {
while i < body.len() && bb[i] != b'\n' {
i += 1;
}
continue;
}
match c {
b'{' => inner += 1,
b'}' => inner -= 1,
b',' if inner == 0 => arm_start = i + 1,
b'=' if inner == 0 && bb.get(i + 1) == Some(&b'>') => {
let mut names = Vec::new();
extract_quoted(&body[arm_start..i], &mut names);
names.retain(|n| !n.contains("::"));
if !names.is_empty() {
arms.push(names);
}
i += 2;
continue;
}
_ => {}
}
i += 1;
}
arms
}
fn extract_categorized_names(
src: &str,
fn_marker: &str,
block_marker: &str,
) -> Vec<(String, String)> {
let fn_pos = src
.find(fn_marker)
.unwrap_or_else(|| panic!("{} not found", fn_marker));
let after = &src[fn_pos..];
let block_rel = after
.find(block_marker)
.unwrap_or_else(|| panic!("`{}` not found inside {}", block_marker, fn_marker));
let block_start_abs = fn_pos + block_rel;
let bytes = src.as_bytes();
let mut i = block_start_abs + block_marker.len();
let (open, close) = loop {
if i >= src.len() {
panic!("no opening delimiter after {}", block_marker);
}
match bytes[i] {
b'{' => break (b'{', b'}'),
b'(' => break (b'(', b')'),
_ => i += 1,
}
};
let body_start = i + 1;
let body_end = find_matching_close(bytes, body_start, open, close);
let body = &src[body_start..body_end];
let mut pairs: Vec<(String, String)> = Vec::new();
let mut current_cat = String::from("uncategorized");
for line in body.lines() {
let trimmed = line.trim_start();
if let Some(rest) = trimmed.strip_prefix("//") {
if let Some(cat) = parse_section_header(rest) {
current_cat = cat;
continue;
}
continue;
}
let mut names = Vec::new();
extract_quoted(line, &mut names);
for n in names {
pairs.push((n, current_cat.clone()));
}
}
pairs
}
fn parse_section_header(comment_body: &str) -> Option<String> {
if !comment_body.contains("──")
&& !comment_body.contains("──────")
&& !comment_body.contains("─")
{
return None;
}
let mut cleaned = String::new();
for c in comment_body.chars() {
if c == '─' {
cleaned.push(' ');
} else {
cleaned.push(c);
}
}
let label = cleaned.trim();
if label.is_empty() {
return None;
}
let label = label.split(':').next().unwrap_or(label).trim();
let label: String = label.split_whitespace().collect::<Vec<_>>().join(" ");
if label.is_empty() {
return None;
}
if label.len() > 40 {
return None;
}
Some(label.to_lowercase())
}
fn extract_lsp_descriptions(src: &str) -> Vec<(String, String)> {
let Some(fn_pos) = src.find("fn doc_for_label_text") else {
return Vec::new();
};
let after = &src[fn_pos..];
let Some(match_rel) = after.find("match key") else {
return Vec::new();
};
let body_start = fn_pos + match_rel + "match key".len();
let bytes = src.as_bytes();
let mut i = body_start;
while i < src.len() && bytes[i] != b'{' {
i += 1;
}
if i >= src.len() {
return Vec::new();
}
let block_start = i + 1;
let block_end = find_matching_rbrace(bytes, block_start);
let body = &src[block_start..block_end];
let mut out: Vec<(String, String)> = Vec::new();
let bb = body.as_bytes();
let mut inner = 0i32;
let mut arm_start = 0usize;
let mut j = 0usize;
while j < body.len() {
let c = bb[j];
if c == b'"' {
j = skip_string(bb, j);
continue;
}
if c == b'/' && bb.get(j + 1) == Some(&b'/') {
while j < body.len() && bb[j] != b'\n' {
j += 1;
}
continue;
}
match c {
b'{' => inner += 1,
b'}' => inner -= 1,
b',' if inner == 0 => arm_start = j + 1,
b'=' if inner == 0 && bb.get(j + 1) == Some(&b'>') => {
let lhs = &body[arm_start..j];
let mut labels = Vec::new();
extract_quoted(lhs, &mut labels);
let mut k = j + 2;
while k < body.len() && bb[k] != b'"' && !(bb[k] == b',' && inner == 0) {
if bb[k] == b',' {
break;
}
k += 1;
}
let desc = if k < body.len() && bb[k] == b'"' {
let end = skip_string(bb, k);
let raw = &body[k + 1..end - 1];
first_sentence(raw)
} else {
String::new()
};
if !desc.is_empty() {
for l in labels {
out.push((l, desc.clone()));
}
}
j += 2;
continue;
}
_ => {}
}
j += 1;
}
out.sort_by(|a, b| a.0.cmp(&b.0));
out.dedup_by(|a, b| a.0 == b.0);
out
}
fn first_sentence(raw: &str) -> String {
let mut s = String::with_capacity(raw.len());
let mut chars = raw.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('n') => s.push('\n'),
Some('t') => s.push('\t'),
Some('r') => s.push('\r'),
Some('\\') => s.push('\\'),
Some('"') => s.push('"'),
Some('\'') => s.push('\''),
Some(other) => s.push(other),
None => break,
}
continue;
}
s.push(c);
}
let first_para = s.split("\n\n").next().unwrap_or(&s);
let first_para = first_para.lines().next().unwrap_or(first_para).trim();
let mut sentence = first_para.to_string();
if let Some(idx) = sentence.find(". ") {
sentence.truncate(idx + 1);
}
const MAX: usize = 200;
if sentence.chars().count() > MAX {
let truncated: String = sentence.chars().take(MAX - 1).collect();
sentence = format!("{}…", truncated);
}
sentence.trim().to_string()
}
fn find_matching_rbrace(bytes: &[u8], start: usize) -> usize {
find_matching_close(bytes, start, b'{', b'}')
}
fn find_matching_close(bytes: &[u8], start: usize, open: u8, close: u8) -> usize {
let mut depth = 1i32;
let mut i = start;
while i < bytes.len() {
let c = bytes[i];
if c == b'"' {
i = skip_string(bytes, i);
continue;
}
if c == b'/' && bytes.get(i + 1) == Some(&b'/') {
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
continue;
}
if c == open {
depth += 1;
} else if c == close {
depth -= 1;
if depth == 0 {
return i;
}
}
i += 1;
}
panic!("unterminated block starting at {}", start);
}
fn skip_string(bytes: &[u8], mut i: usize) -> usize {
i += 1;
while i < bytes.len() {
if bytes[i] == b'\\' {
i = (i + 2).min(bytes.len());
continue;
}
if bytes[i] == b'"' {
return i + 1;
}
i += 1;
}
i
}
fn extract_quoted(slice: &str, out: &mut Vec<String>) {
let bytes = slice.as_bytes();
let mut i = 0usize;
while i < slice.len() {
if bytes[i] == b'"' {
let start = i + 1;
let mut j = start;
while j < slice.len() && bytes[j] != b'"' {
if bytes[j] == b'\\' {
j = (j + 2).min(slice.len());
continue;
}
j += 1;
}
if j > start {
out.push(slice[start..j].to_string());
}
i = j + 1;
} else {
i += 1;
}
}
}
fn extract_builtin_doc_comments(src: &str, _arms: &[Vec<String>]) -> Vec<(String, String)> {
let mut name_to_fn: std::collections::HashMap<String, String> =
std::collections::HashMap::new();
let dispatch_re =
regex::Regex::new(r#""([a-z_][a-z0-9_:]*)"[^=]*=>[^(]*\((builtin_\w+)\("#).unwrap();
for cap in dispatch_re.captures_iter(src) {
name_to_fn.insert(cap[1].to_string(), cap[2].to_string());
}
let mut fn_docs: std::collections::HashMap<String, String> = std::collections::HashMap::new();
let lines: Vec<&str> = src.lines().collect();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if !(trimmed.starts_with("fn builtin_") || trimmed.starts_with("pub fn builtin_")) {
continue;
}
let fn_name = trimmed
.split('(')
.next()
.unwrap_or("")
.split_whitespace()
.last()
.unwrap_or("");
if fn_name.is_empty() {
continue;
}
let mut doc_lines: Vec<&str> = Vec::new();
let mut j = i as isize - 1;
while j >= 0 {
let prev = lines[j as usize].trim();
if prev.starts_with("///") {
doc_lines.push(
prev.strip_prefix("/// ")
.unwrap_or(prev.strip_prefix("///").unwrap_or(prev)),
);
j -= 1;
} else if prev.is_empty() {
j -= 1;
} else {
break;
}
}
if doc_lines.is_empty() {
continue;
}
doc_lines.reverse();
let doc = doc_lines.join(" ").trim().to_string();
if !doc.is_empty() {
fn_docs.insert(fn_name.to_string(), doc);
}
}
let mut out: Vec<(String, String)> = Vec::new();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
for (dispatch_name, fn_name) in &name_to_fn {
if let Some(doc) = fn_docs.get(fn_name) {
if seen.insert(dispatch_name.clone()) {
out.push((dispatch_name.clone(), first_sentence_build(doc)));
}
}
}
out.sort();
out.dedup_by(|a, b| a.0 == b.0);
out
}
fn first_sentence_build(s: &str) -> String {
let mut sentence = s.to_string();
if let Some(idx) = sentence.find(". ") {
sentence.truncate(idx + 1);
}
if sentence.chars().count() > 200 {
let truncated: String = sentence.chars().take(199).collect();
sentence = format!("{}\u{2026}", truncated);
}
sentence
}