use std::{
ffi::{CStr, CString},
os::raw::c_char,
path::PathBuf,
};
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("failed to load libclang: {0}")]
LibClang(String),
#[error("clang_createIndex returned null")]
NullIndex,
#[error("clang failed to parse `{}`: error code {code}", path.display())]
Parse {
path: PathBuf,
code: i32,
},
}
pub struct Index {
handle: clang_sys::CXIndex,
}
unsafe impl Send for Index {}
impl Index {
pub fn new() -> Result<Self, Error> {
if !clang_sys::is_loaded() {
clang_sys::load().map_err(Error::LibClang)?;
}
let handle = unsafe { clang_sys::clang_createIndex(0, 0) };
if handle.is_null() {
return Err(Error::NullIndex);
}
Ok(Self { handle })
}
pub fn parser(&self, file: impl Into<PathBuf>) -> Parser<'_> {
Parser {
index: self,
file: file.into(),
arguments: Vec::new(),
}
}
}
impl Drop for Index {
fn drop(&mut self) {
unsafe { clang_sys::clang_disposeIndex(self.handle) };
}
}
pub struct Parser<'i> {
index: &'i Index,
file: PathBuf,
arguments: Vec<String>,
}
impl<'i> Parser<'i> {
pub fn arg(&mut self, arg: impl Into<String>) -> &mut Self {
self.arguments.push(arg.into());
self
}
pub fn args(&mut self, args: impl IntoIterator<Item = impl Into<String>>) -> &mut Self {
for arg in args {
self.arg(arg);
}
self
}
pub fn parse(&self) -> Result<TranslationUnit<'i>, Error> {
let path = CString::new(self.file.to_string_lossy().as_bytes())
.map_err(|err| Error::LibClang(format!("path contains NUL: {err}")))?;
let cstrings = self
.arguments
.iter()
.map(|a| CString::new(a.as_bytes()))
.collect::<Result<Vec<_>, _>>()
.map_err(|err| Error::LibClang(format!("argument contains NUL: {err}")))?;
let argv = cstrings
.iter()
.map(|c| c.as_ptr())
.collect::<Vec<*const c_char>>();
let mut handle = std::ptr::null_mut();
let code = unsafe {
clang_sys::clang_parseTranslationUnit2(
self.index.handle,
path.as_ptr(),
argv.as_ptr(),
argv.len() as i32,
std::ptr::null_mut(),
0,
clang_sys::CXTranslationUnit_None,
&mut handle,
)
};
if code != clang_sys::CXError_Success || handle.is_null() {
return Err(Error::Parse {
path: self.file.clone(),
code,
});
}
Ok(TranslationUnit {
handle,
_marker: std::marker::PhantomData,
})
}
}
pub struct TranslationUnit<'i> {
handle: clang_sys::CXTranslationUnit,
_marker: std::marker::PhantomData<&'i Index>,
}
impl<'i> TranslationUnit<'i> {
pub fn get_entity(&'i self) -> Entity<'i> {
let raw = unsafe { clang_sys::clang_getTranslationUnitCursor(self.handle) };
Entity { raw, tu: self }
}
}
impl<'i> Drop for TranslationUnit<'i> {
fn drop(&mut self) {
unsafe { clang_sys::clang_disposeTranslationUnit(self.handle) };
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum EntityKind {
UnexposedDecl,
StructDecl,
UnionDecl,
ClassDecl,
FunctionDecl,
ParmDecl,
TypedefDecl,
Method,
Namespace,
LinkageSpec,
TypeAliasDecl,
BaseSpecifier,
TranslationUnit,
AnnotateAttr,
Other(i32),
}
impl EntityKind {
fn from_raw(raw: clang_sys::CXCursorKind) -> Self {
match raw {
clang_sys::CXCursor_UnexposedDecl => Self::UnexposedDecl,
clang_sys::CXCursor_StructDecl => Self::StructDecl,
clang_sys::CXCursor_UnionDecl => Self::UnionDecl,
clang_sys::CXCursor_ClassDecl => Self::ClassDecl,
clang_sys::CXCursor_FunctionDecl => Self::FunctionDecl,
clang_sys::CXCursor_ParmDecl => Self::ParmDecl,
clang_sys::CXCursor_TypedefDecl => Self::TypedefDecl,
clang_sys::CXCursor_CXXMethod => Self::Method,
clang_sys::CXCursor_Namespace => Self::Namespace,
clang_sys::CXCursor_LinkageSpec => Self::LinkageSpec,
clang_sys::CXCursor_TypeAliasDecl => Self::TypeAliasDecl,
clang_sys::CXCursor_CXXBaseSpecifier => Self::BaseSpecifier,
clang_sys::CXCursor_TranslationUnit => Self::TranslationUnit,
clang_sys::CXCursor_AnnotateAttr => Self::AnnotateAttr,
_ => Self::Other(raw),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TypeKind {
Unexposed,
Void,
Bool,
CharU,
UChar,
UShort,
UInt,
ULong,
ULongLong,
CharS,
SChar,
WChar,
Short,
Int,
Long,
LongLong,
Float,
Double,
Pointer,
LValueReference,
RValueReference,
Enum,
Other(i32),
}
impl TypeKind {
fn from_raw(raw: clang_sys::CXTypeKind) -> Self {
match raw {
clang_sys::CXType_Unexposed => Self::Unexposed,
clang_sys::CXType_Void => Self::Void,
clang_sys::CXType_Bool => Self::Bool,
clang_sys::CXType_Char_U => Self::CharU,
clang_sys::CXType_UChar => Self::UChar,
clang_sys::CXType_UShort => Self::UShort,
clang_sys::CXType_UInt => Self::UInt,
clang_sys::CXType_ULong => Self::ULong,
clang_sys::CXType_ULongLong => Self::ULongLong,
clang_sys::CXType_Char_S => Self::CharS,
clang_sys::CXType_SChar => Self::SChar,
clang_sys::CXType_WChar => Self::WChar,
clang_sys::CXType_Short => Self::Short,
clang_sys::CXType_Int => Self::Int,
clang_sys::CXType_Long => Self::Long,
clang_sys::CXType_LongLong => Self::LongLong,
clang_sys::CXType_Float => Self::Float,
clang_sys::CXType_Double => Self::Double,
clang_sys::CXType_Pointer => Self::Pointer,
clang_sys::CXType_LValueReference => Self::LValueReference,
clang_sys::CXType_RValueReference => Self::RValueReference,
clang_sys::CXType_Enum => Self::Enum,
_ => Self::Other(raw),
}
}
}
unsafe fn cx_string_to_owned(s: clang_sys::CXString) -> Option<String> {
let raw = unsafe { clang_sys::clang_getCString(s) };
if raw.is_null() {
unsafe { clang_sys::clang_disposeString(s) };
return None;
}
let owned = unsafe { CStr::from_ptr(raw) }
.to_string_lossy()
.into_owned();
unsafe { clang_sys::clang_disposeString(s) };
if owned.is_empty() { None } else { Some(owned) }
}
#[derive(Clone, Copy)]
pub struct Entity<'tu> {
raw: clang_sys::CXCursor,
tu: &'tu TranslationUnit<'tu>,
}
impl<'tu> Entity<'tu> {
pub fn get_kind(&self) -> EntityKind {
let raw = unsafe { clang_sys::clang_getCursorKind(self.raw) };
EntityKind::from_raw(raw)
}
pub fn get_name(&self) -> Option<String> {
unsafe {
let s = clang_sys::clang_getCursorSpelling(self.raw);
cx_string_to_owned(s)
}
}
pub fn get_type(&self) -> Option<Type<'tu>> {
let raw = unsafe { clang_sys::clang_getCursorType(self.raw) };
match raw.kind {
clang_sys::CXType_Invalid => None,
_ => Some(Type { raw, tu: self.tu }),
}
}
pub fn get_result_type(&self) -> Option<Type<'tu>> {
let raw = unsafe { clang_sys::clang_getCursorResultType(self.raw) };
match raw.kind {
clang_sys::CXType_Invalid => None,
_ => Some(Type { raw, tu: self.tu }),
}
}
pub fn is_invalid_declaration(&self) -> bool {
unsafe { clang_sys::clang_isInvalidDeclaration(self.raw) != 0 }
}
pub fn get_children(&self) -> Vec<Self> {
extern "C" fn collect(
cursor: clang_sys::CXCursor,
_parent: clang_sys::CXCursor,
data: clang_sys::CXClientData,
) -> clang_sys::CXChildVisitResult {
let acc = unsafe { &mut *(data as *mut Vec<clang_sys::CXCursor>) };
acc.push(cursor);
clang_sys::CXChildVisit_Continue
}
let mut acc = Vec::<clang_sys::CXCursor>::new();
unsafe {
clang_sys::clang_visitChildren(
self.raw,
collect,
&mut acc as *mut _ as clang_sys::CXClientData,
);
}
acc.into_iter()
.map(|raw| Self { raw, tu: self.tu })
.collect()
}
}
#[derive(Clone, Copy)]
pub struct Type<'tu> {
raw: clang_sys::CXType,
tu: &'tu TranslationUnit<'tu>,
}
impl<'tu> Type<'tu> {
pub fn get_kind(&self) -> TypeKind {
TypeKind::from_raw(self.raw.kind)
}
pub fn get_display_name(&self) -> String {
unsafe { cx_string_to_owned(clang_sys::clang_getTypeSpelling(self.raw)) }
.unwrap_or_default()
}
pub fn get_sizeof(&self) -> Option<u64> {
let size = unsafe { clang_sys::clang_Type_getSizeOf(self.raw) };
match size {
size if size > 0 => Some(size as u64),
_ => None,
}
}
pub fn get_canonical_type(&self) -> Self {
let raw = unsafe { clang_sys::clang_getCanonicalType(self.raw) };
Self { raw, tu: self.tu }
}
pub fn get_pointee_type(&self) -> Option<Self> {
let raw = unsafe { clang_sys::clang_getPointeeType(self.raw) };
match raw.kind {
clang_sys::CXType_Invalid => None,
_ => Some(Self { raw, tu: self.tu }),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn index_creates_and_drops() {
match Index::new() {
Ok(index) => drop(index),
Err(err) => eprintln!("skipping: libclang not available: {err}"),
}
}
#[test]
fn entity_kind_round_trip_known() {
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_FunctionDecl),
EntityKind::FunctionDecl,
);
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_CXXMethod),
EntityKind::Method,
);
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_TranslationUnit),
EntityKind::TranslationUnit,
);
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_AnnotateAttr),
EntityKind::AnnotateAttr,
);
}
#[test]
fn entity_kind_unknown_folds_to_other() {
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_BinaryOperator),
EntityKind::Other(clang_sys::CXCursor_BinaryOperator),
);
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_FunctionTemplate),
EntityKind::Other(clang_sys::CXCursor_FunctionTemplate),
);
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_ClassTemplate),
EntityKind::Other(clang_sys::CXCursor_ClassTemplate),
);
}
#[test]
fn entity_kind_unexposed_decl_maps_to_unexposed_decl() {
assert_eq!(
EntityKind::from_raw(clang_sys::CXCursor_UnexposedDecl),
EntityKind::UnexposedDecl,
);
}
#[test]
fn type_kind_round_trip_known() {
assert_eq!(TypeKind::from_raw(clang_sys::CXType_Void), TypeKind::Void);
assert_eq!(TypeKind::from_raw(clang_sys::CXType_Bool), TypeKind::Bool);
assert_eq!(
TypeKind::from_raw(clang_sys::CXType_Pointer),
TypeKind::Pointer
);
assert_eq!(
TypeKind::from_raw(clang_sys::CXType_LValueReference),
TypeKind::LValueReference,
);
assert_eq!(TypeKind::from_raw(clang_sys::CXType_Enum), TypeKind::Enum);
}
#[test]
fn type_kind_unknown_folds_to_other() {
assert_eq!(
TypeKind::from_raw(clang_sys::CXType_Vector),
TypeKind::Other(clang_sys::CXType_Vector),
);
}
#[test]
fn type_kind_unexposed_maps_to_unexposed() {
assert_eq!(
TypeKind::from_raw(clang_sys::CXType_Unexposed),
TypeKind::Unexposed,
);
}
#[test]
fn parses_a_function_declaration() {
let index = match Index::new() {
Ok(index) => index,
Err(err) => {
eprintln!("skipping: libclang not available: {err}");
return;
}
};
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("input.cpp");
std::fs::write(&path, b"int my_function(int x);\n").expect("write");
let mut parser = index.parser(&path);
parser.args(["-x", "c++"]);
let tu = parser.parse().expect("parse");
let root = tu.get_entity();
assert_eq!(root.get_kind(), EntityKind::TranslationUnit);
let children = root.get_children();
let function = children
.iter()
.find(|entity| entity.get_kind() == EntityKind::FunctionDecl)
.expect("FunctionDecl");
assert_eq!(function.get_name().as_deref(), Some("my_function"));
let params = function
.get_children()
.into_iter()
.filter(|entity| entity.get_kind() == EntityKind::ParmDecl)
.count();
assert_eq!(params, 1);
}
#[test]
fn type_accessors_walk_a_pointer() {
let index = match Index::new() {
Ok(index) => index,
Err(err) => {
eprintln!("skipping: libclang not available: {err}");
return;
}
};
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("input.cpp");
std::fs::write(&path, b"int* takes_pointer(const int* x);\n").expect("write");
let mut parser = index.parser(&path);
parser.args(["-x", "c++"]);
let tu = parser.parse().expect("parse");
let function = tu
.get_entity()
.get_children()
.into_iter()
.find(|c| c.get_kind() == EntityKind::FunctionDecl)
.expect("FunctionDecl");
let return_ty = function.get_result_type().expect("result type");
assert_eq!(return_ty.get_kind(), TypeKind::Pointer);
let pointee = return_ty.get_pointee_type().expect("pointee");
assert_eq!(pointee.get_canonical_type().get_kind(), TypeKind::Int);
let parm = function
.get_children()
.into_iter()
.find(|c| c.get_kind() == EntityKind::ParmDecl)
.expect("ParmDecl");
let parm_ty = parm.get_type().expect("parm type");
assert_eq!(parm_ty.get_kind(), TypeKind::Pointer);
assert!(parm_ty.get_display_name().contains("const int"));
}
}