use std::sync::OnceLock;
use regex::Regex;
#[derive(Debug, Clone)]
pub struct UsingAlias {
pub name: String,
pub name_start_byte: usize,
pub name_end_byte: usize,
}
fn re() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| {
Regex::new(r"\busing\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap()
})
}
pub(crate) fn strip_comments(src: &str) -> String {
let mut out = src.as_bytes().to_vec();
let mut i = 0;
while i < out.len() {
if out[i] == b'#' {
while i < out.len() && out[i] != b'\n' {
out[i] = b' ';
i += 1;
}
} else {
i += 1;
}
}
String::from_utf8(out).unwrap_or_default()
}
pub fn scan(src: &str) -> Vec<UsingAlias> {
let cleaned = strip_comments(src);
re()
.captures_iter(&cleaned)
.filter_map(|c| {
let m = c.get(1)?;
Some(UsingAlias {
name: m.as_str().to_string(),
name_start_byte: m.start(),
name_end_byte: m.end(),
})
})
.collect()
}
pub fn find<'a>(
aliases: &'a [UsingAlias],
name: &str,
) -> Option<&'a UsingAlias> {
aliases.iter().find(|a| a.name == name)
}
#[derive(Debug, Clone)]
pub struct TopLevelDecl {
pub kind: DeclKind,
pub name: String,
pub doc_comment: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DeclKind {
Struct,
Enum,
Interface,
Annotation,
Const,
Using,
}
pub fn scan_top_level(src: &str) -> Vec<TopLevelDecl> {
use std::sync::OnceLock;
static R: OnceLock<Regex> = OnceLock::new();
let re = R.get_or_init(|| {
Regex::new(r"^\s*(struct|enum|interface|annotation|const|using)\s+([A-Za-z_][A-Za-z0-9_]*)")
.unwrap()
});
let lines: Vec<&str> = src.lines().collect();
let mut out = Vec::new();
for (i, line) in lines.iter().enumerate() {
let Some(c) = re.captures(line) else { continue };
let kind = match &c[1] {
"struct" => DeclKind::Struct,
"enum" => DeclKind::Enum,
"interface" => DeclKind::Interface,
"annotation" => DeclKind::Annotation,
"const" => DeclKind::Const,
"using" => DeclKind::Using,
_ => continue,
};
let name = c[2].to_string();
let mut doc_lines: Vec<&str> = Vec::new();
let mut j = i;
while j > 0 {
j -= 1;
let prev = lines[j].trim_start();
if let Some(rest) = prev.strip_prefix('#') {
doc_lines.push(rest.trim_start_matches(' '));
} else {
break;
}
}
doc_lines.reverse();
let doc = (!doc_lines.is_empty()).then(|| doc_lines.join("\n"));
out.push(TopLevelDecl {
kind,
name,
doc_comment: doc,
});
}
out
}
pub fn import_path_for(src: &str, name: &str) -> Option<String> {
use std::sync::OnceLock;
static R: OnceLock<Regex> = OnceLock::new();
let re = R.get_or_init(|| {
Regex::new(
r#"\busing\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*import\s+"([^"]+)"\s*;"#,
)
.unwrap()
});
let cleaned = strip_comments(src);
for c in re.captures_iter(&cleaned) {
if &c[1] == name {
return Some(c[2].to_string());
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scans_basic() {
let src = "using UUID = Data;\nusing UTCSecondsSinceEpoch = UInt64;\n";
let a = scan(src);
assert_eq!(a.len(), 2);
assert_eq!(a[0].name, "UUID");
assert_eq!(
&src[a[1].name_start_byte..a[1].name_end_byte],
"UTCSecondsSinceEpoch"
);
}
#[test]
fn ignores_comments() {
let src = "# using Foo = Bar;\nusing Real = Text;\n";
let a = scan(src);
assert_eq!(a.len(), 1);
assert_eq!(a[0].name, "Real");
}
}