com_scrape/
clang.rs

1use std::any::Any;
2use std::error::Error;
3use std::ffi::{c_char, c_int, c_longlong, c_uint, c_ulong, c_ulonglong, c_void, CStr, CString};
4use std::fmt::Display;
5use std::marker::PhantomData;
6use std::mem::MaybeUninit;
7use std::ops::Deref;
8use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe};
9use std::path::PathBuf;
10use std::ptr::NonNull;
11use std::{fmt, ptr, slice};
12
13use clang_sys::support::Clang;
14use clang_sys::*;
15
16macro_rules! c_str {
17    ($str:literal) => {
18        concat!($str, "\0").as_ptr() as *const c_char
19    };
20}
21
22pub struct TranslationUnit {
23    index: CXIndex,
24    unit: CXTranslationUnit,
25}
26
27impl TranslationUnit {
28    pub fn new(
29        source: &str,
30        include_paths: &[PathBuf],
31        target: Option<&str>,
32    ) -> Result<TranslationUnit, Box<dyn Error>> {
33        let mut args = vec![
34            "-x".to_string(),
35            "c++".to_string(),
36            "-std=c++14".to_string(),
37        ];
38
39        if let Some(target) = target {
40            args.push("-target".to_string());
41            args.push(target.to_string());
42        }
43
44        if let Some(clang) = Clang::find(None, &args) {
45            if let Some(paths) = clang.cpp_search_paths {
46                for path in paths {
47                    args.push("-isystem".to_string());
48                    args.push(path.to_str().unwrap().to_string());
49                }
50            }
51        }
52
53        for include_path in include_paths {
54            args.push("-I".to_string());
55            args.push(include_path.to_str().unwrap().to_string());
56        }
57
58        let args_cstrs = args
59            .iter()
60            .map(|s| CString::new(&**s).unwrap())
61            .collect::<Vec<_>>();
62        let args_ptrs = args_cstrs.iter().map(|s| s.as_ptr()).collect::<Vec<_>>();
63
64        unsafe {
65            let index = clang_createIndex(0, 0);
66
67            let filename = c_str!("header.h");
68            let mut sources = [CXUnsavedFile {
69                Filename: filename,
70                Contents: source.as_ptr() as *const c_char,
71                Length: source.len() as c_ulong,
72            }];
73
74            let mut unit = MaybeUninit::uninit();
75            let result = clang_parseTranslationUnit2(
76                index,
77                filename,
78                args_ptrs.as_ptr(),
79                args_ptrs.len() as c_int,
80                sources.as_mut_ptr(),
81                sources.len() as u32,
82                CXTranslationUnit_None,
83                unit.as_mut_ptr(),
84            );
85            let unit = unit.assume_init();
86
87            if result != CXError_Success {
88                clang_disposeIndex(index);
89                return Err("error building translation unit".into());
90            }
91
92            let mut has_error = false;
93            let mut error = String::new();
94            let num_diagnostics = clang_getNumDiagnostics(unit) as usize;
95            for i in 0..num_diagnostics {
96                let diagnostic = clang_getDiagnostic(unit, i as c_uint);
97                if clang_getDiagnosticSeverity(diagnostic) >= CXDiagnostic_Error {
98                    has_error = true;
99
100                    let opts = clang_defaultDiagnosticDisplayOptions();
101                    let str = StringRef::from_raw(clang_formatDiagnostic(diagnostic, opts));
102                    error.push_str(str.to_str().unwrap());
103                    error.push('\n');
104                }
105            }
106            if has_error {
107                clang_disposeIndex(index);
108                return Err(error.into());
109            }
110
111            Ok(TranslationUnit { index, unit })
112        }
113    }
114
115    pub fn cursor(&self) -> Cursor {
116        unsafe { Cursor::from_raw(clang_getTranslationUnitCursor(self.unit)) }
117    }
118}
119
120impl Drop for TranslationUnit {
121    fn drop(&mut self) {
122        unsafe {
123            clang_disposeTranslationUnit(self.unit);
124            clang_disposeIndex(self.index);
125        }
126    }
127}
128
129#[derive(Copy, Clone, Eq, PartialEq, Debug)]
130pub enum CursorKind {
131    Namespace,
132    TypedefDecl,
133    TypeAliasDecl,
134    EnumDecl,
135    EnumConstantDecl,
136    VarDecl,
137    StructDecl,
138    UnionDecl,
139    ClassDecl,
140    FieldDecl,
141    CxxMethod,
142    CxxBaseSpecifier,
143    Other,
144}
145
146pub struct Cursor<'a> {
147    cursor: CXCursor,
148    _marker: PhantomData<&'a ()>,
149}
150
151impl<'a> Cursor<'a> {
152    unsafe fn from_raw(cursor: CXCursor) -> Cursor<'a> {
153        Cursor {
154            cursor,
155            _marker: PhantomData,
156        }
157    }
158
159    pub fn kind(&self) -> CursorKind {
160        #[allow(non_upper_case_globals)]
161        match unsafe { clang_getCursorKind(self.cursor) } {
162            CXCursor_Namespace => CursorKind::Namespace,
163            CXCursor_TypedefDecl => CursorKind::TypedefDecl,
164            CXCursor_TypeAliasDecl => CursorKind::TypeAliasDecl,
165            CXCursor_EnumDecl => CursorKind::EnumDecl,
166            CXCursor_EnumConstantDecl => CursorKind::EnumConstantDecl,
167            CXCursor_VarDecl => CursorKind::VarDecl,
168            CXCursor_StructDecl => CursorKind::StructDecl,
169            CXCursor_UnionDecl => CursorKind::UnionDecl,
170            CXCursor_ClassDecl => CursorKind::ClassDecl,
171            CXCursor_FieldDecl => CursorKind::FieldDecl,
172            CXCursor_CXXMethod => CursorKind::CxxMethod,
173            CXCursor_CXXBaseSpecifier => CursorKind::CxxBaseSpecifier,
174            _ => CursorKind::Other,
175        }
176    }
177
178    pub fn name(&self) -> StringRef<'a> {
179        unsafe { StringRef::from_raw(clang_getCursorSpelling(self.cursor)) }
180    }
181
182    pub fn is_anonymous(&self) -> bool {
183        unsafe { clang_Cursor_isAnonymous(self.cursor) != 0 }
184    }
185
186    pub fn location(&self) -> Location<'a> {
187        unsafe { Location::from_raw(clang_getCursorLocation(self.cursor)) }
188    }
189
190    pub fn is_in_system_header(&self) -> bool {
191        unsafe {
192            let location = clang_getCursorLocation(self.cursor);
193            clang_Location_isInSystemHeader(location) != 0
194        }
195    }
196
197    pub fn is_definition(&self) -> bool {
198        unsafe { clang_equalCursors(self.cursor, clang_getCursorDefinition(self.cursor)) != 0 }
199    }
200
201    pub fn type_(&self) -> Option<Type<'a>> {
202        let type_ = unsafe { clang_getCursorType(self.cursor) };
203        if type_.kind == CXType_Invalid {
204            None
205        } else {
206            Some(unsafe { Type::from_raw(type_) })
207        }
208    }
209
210    pub fn typedef_underlying_type(&self) -> Option<Type<'a>> {
211        let type_ = unsafe { clang_getTypedefDeclUnderlyingType(self.cursor) };
212        if type_.kind == CXType_Invalid {
213            None
214        } else {
215            Some(unsafe { Type::from_raw(type_) })
216        }
217    }
218
219    pub fn enum_integer_type(&self) -> Option<Type<'a>> {
220        let type_ = unsafe { clang_getEnumDeclIntegerType(self.cursor) };
221        if type_.kind == CXType_Invalid {
222            None
223        } else {
224            Some(unsafe { Type::from_raw(type_) })
225        }
226    }
227
228    pub fn enum_constant_value(&self) -> Option<c_longlong> {
229        if self.kind() == CursorKind::EnumConstantDecl {
230            unsafe { Some(clang_getEnumConstantDeclValue(self.cursor)) }
231        } else {
232            None
233        }
234    }
235
236    pub fn enum_constant_value_unsigned(&self) -> Option<c_ulonglong> {
237        if self.kind() == CursorKind::EnumConstantDecl {
238            unsafe { Some(clang_getEnumConstantDeclUnsignedValue(self.cursor)) }
239        } else {
240            None
241        }
242    }
243
244    pub fn num_arguments(&self) -> Option<usize> {
245        let num_arguments = unsafe { clang_Cursor_getNumArguments(self.cursor) };
246
247        if num_arguments == -1 {
248            None
249        } else {
250            Some(num_arguments as usize)
251        }
252    }
253
254    pub fn argument(&self, index: usize) -> Option<Cursor<'a>> {
255        unsafe {
256            let argument = clang_Cursor_getArgument(self.cursor, index as c_uint);
257
258            if clang_Cursor_isNull(argument) != 0 {
259                None
260            } else {
261                Some(Cursor::from_raw(argument))
262            }
263        }
264    }
265
266    pub fn result_type(&self) -> Option<Type<'a>> {
267        let result_type = unsafe { clang_getCursorResultType(self.cursor) };
268        if result_type.kind == CXType_Invalid {
269            None
270        } else {
271            Some(unsafe { Type::from_raw(result_type) })
272        }
273    }
274
275    pub fn is_virtual(&self) -> bool {
276        unsafe { clang_CXXMethod_isVirtual(self.cursor) != 0 }
277    }
278
279    pub fn evaluate(&self) -> EvalResult<'a> {
280        unsafe { EvalResult::from_raw(clang_Cursor_Evaluate(self.cursor)) }
281    }
282
283    pub fn tokens(&self) -> Tokens<'a> {
284        unsafe {
285            let unit = clang_Cursor_getTranslationUnit(self.cursor);
286
287            let extent = clang_getCursorExtent(self.cursor);
288            let start = Location::from_raw(clang_getRangeStart(extent)).file_location();
289            let end = Location::from_raw(clang_getRangeEnd(extent)).file_location();
290
291            let physical_start = clang_getLocationForOffset(unit, start.file, start.offset);
292            let physical_end = clang_getLocationForOffset(unit, end.file, end.offset);
293            let physical_extent = clang_getRange(physical_start, physical_end);
294
295            let mut ptr = NonNull::dangling().as_ptr();
296            let mut len = 0;
297            clang_tokenize(unit, physical_extent, &mut ptr, &mut len);
298
299            Tokens::from_raw(unit, ptr, len as usize)
300        }
301    }
302
303    pub fn visit_children<F, E>(&self, mut callback: F) -> Result<(), E>
304    where
305        F: FnMut(&Cursor) -> Result<(), E>,
306    {
307        extern "C" fn visitor<E>(
308            cursor: CXCursor,
309            _parent: CXCursor,
310            client_data: CXClientData,
311        ) -> CXChildVisitResult {
312            let data_ptr = client_data as *mut Data<E>;
313
314            // If a re-entrant call to visit_children panicked, continue unwinding
315            let data = unsafe { &*data_ptr };
316            if data.panic.is_some() {
317                return CXChildVisit_Break;
318            }
319
320            let result = catch_unwind(AssertUnwindSafe(|| unsafe {
321                let data = &mut *data_ptr;
322                (data.callback)(&Cursor::from_raw(cursor))
323            }));
324
325            match result {
326                Ok(res) => match res {
327                    Ok(()) => CXChildVisit_Continue,
328                    Err(err) => {
329                        let data = unsafe { &mut *data_ptr };
330                        data.result = Err(err);
331                        CXChildVisit_Break
332                    }
333                },
334                Err(panic) => {
335                    let data = unsafe { &mut *data_ptr };
336                    data.panic = Some(panic);
337
338                    CXChildVisit_Break
339                }
340            }
341        }
342
343        struct Data<'c, E> {
344            callback: &'c mut dyn FnMut(&Cursor) -> Result<(), E>,
345            result: Result<(), E>,
346            panic: Option<Box<dyn Any + Send + 'static>>,
347        }
348        let mut data = Data {
349            callback: &mut callback,
350            result: Ok(()),
351            panic: None,
352        };
353
354        unsafe {
355            clang_visitChildren(
356                self.cursor,
357                visitor::<E>,
358                &mut data as *mut Data<E> as *mut c_void,
359            );
360        }
361
362        if let Some(panic) = data.panic {
363            resume_unwind(panic);
364        }
365
366        data.result
367    }
368}
369
370#[derive(Copy, Clone, Eq, PartialEq, Debug)]
371pub enum TypeKind {
372    Void,
373    Bool,
374    #[allow(non_camel_case_types)]
375    Char_U,
376    UChar,
377    Char16,
378    Char32,
379    UShort,
380    UInt,
381    ULong,
382    ULongLong,
383    #[allow(non_camel_case_types)]
384    Char_S,
385    SChar,
386    WChar,
387    Short,
388    Int,
389    Long,
390    LongLong,
391    Float,
392    Double,
393    Pointer,
394    LValueReference,
395    Record,
396    Enum,
397    Typedef,
398    ConstantArray,
399    Elaborated,
400    Other,
401}
402
403pub struct Type<'a> {
404    type_: CXType,
405    _marker: PhantomData<&'a ()>,
406}
407
408impl<'a> Type<'a> {
409    unsafe fn from_raw(type_: CXType) -> Type<'a> {
410        Type {
411            type_,
412            _marker: PhantomData,
413        }
414    }
415
416    pub fn kind(&self) -> TypeKind {
417        #[allow(non_upper_case_globals)]
418        match self.type_.kind {
419            CXType_Void => TypeKind::Void,
420            CXType_Bool => TypeKind::Bool,
421            CXType_Char_U => TypeKind::Char_U,
422            CXType_UChar => TypeKind::UChar,
423            CXType_Char16 => TypeKind::Char16,
424            CXType_Char32 => TypeKind::Char32,
425            CXType_UShort => TypeKind::UShort,
426            CXType_UInt => TypeKind::UInt,
427            CXType_ULong => TypeKind::ULong,
428            CXType_ULongLong => TypeKind::ULongLong,
429            CXType_Char_S => TypeKind::Char_S,
430            CXType_SChar => TypeKind::SChar,
431            CXType_WChar => TypeKind::WChar,
432            CXType_Short => TypeKind::Short,
433            CXType_Int => TypeKind::Int,
434            CXType_Long => TypeKind::Long,
435            CXType_LongLong => TypeKind::LongLong,
436            CXType_Float => TypeKind::Float,
437            CXType_Double => TypeKind::Double,
438            CXType_Pointer => TypeKind::Pointer,
439            CXType_LValueReference => TypeKind::LValueReference,
440            CXType_Record => TypeKind::Record,
441            CXType_Enum => TypeKind::Enum,
442            CXType_Typedef => TypeKind::Typedef,
443            CXType_ConstantArray => TypeKind::ConstantArray,
444            CXType_Elaborated => TypeKind::Elaborated,
445            _ => TypeKind::Other,
446        }
447    }
448
449    pub fn is_const(&self) -> bool {
450        unsafe { clang_isConstQualifiedType(self.type_) != 0 }
451    }
452
453    pub fn size(&self) -> usize {
454        unsafe { clang_Type_getSizeOf(self.type_) as usize }
455    }
456
457    #[allow(unused)]
458    pub fn name(&self) -> StringRef<'a> {
459        unsafe { StringRef::from_raw(clang_getTypeSpelling(self.type_)) }
460    }
461
462    pub fn declaration(&self) -> Cursor<'a> {
463        unsafe { Cursor::from_raw(clang_getTypeDeclaration(self.type_)) }
464    }
465
466    pub fn canonical_type(&self) -> Type<'a> {
467        unsafe { Type::from_raw(clang_getCanonicalType(self.type_)) }
468    }
469
470    pub fn pointee(&self) -> Option<Type<'a>> {
471        let pointee = unsafe { clang_getPointeeType(self.type_) };
472        if pointee.kind == CXType_Invalid {
473            None
474        } else {
475            Some(unsafe { Type::from_raw(pointee) })
476        }
477    }
478
479    pub fn typedef_name(&self) -> Option<StringRef<'a>> {
480        let name = unsafe { StringRef::from_raw(clang_getTypedefName(self.type_)) };
481        if name.to_bytes().is_empty() {
482            None
483        } else {
484            Some(name)
485        }
486    }
487
488    pub fn array_size(&self) -> Option<usize> {
489        let size = unsafe { clang_getArraySize(self.type_) };
490        if size == -1 {
491            None
492        } else {
493            Some(size as usize)
494        }
495    }
496
497    pub fn array_element_type(&self) -> Option<Type<'a>> {
498        let element_type = unsafe { clang_getArrayElementType(self.type_) };
499        if element_type.kind == CXType_Invalid {
500            None
501        } else {
502            Some(unsafe { Type::from_raw(element_type) })
503        }
504    }
505
506    pub fn named_type(&self) -> Option<Type<'a>> {
507        let named_type = unsafe { clang_Type_getNamedType(self.type_) };
508        if named_type.kind == CXType_Invalid {
509            None
510        } else {
511            Some(unsafe { Type::from_raw(named_type) })
512        }
513    }
514}
515
516pub struct Location<'a> {
517    location: CXSourceLocation,
518    _marker: PhantomData<&'a ()>,
519}
520
521impl<'a> Location<'a> {
522    unsafe fn from_raw(location: CXSourceLocation) -> Location<'a> {
523        Location {
524            location,
525            _marker: PhantomData,
526        }
527    }
528
529    pub fn file_location(&self) -> FileLocation<'a> {
530        let mut file = ptr::null_mut();
531        let mut line = 0;
532        let mut column = 0;
533        let mut offset = 0;
534        unsafe {
535            clang_getFileLocation(
536                self.location,
537                &mut file,
538                &mut line,
539                &mut column,
540                &mut offset,
541            );
542        }
543
544        FileLocation {
545            file,
546            line,
547            column,
548            offset,
549            _marker: PhantomData,
550        }
551    }
552}
553
554impl<'a> Display for Location<'a> {
555    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
556        let file_location = self.file_location();
557
558        if let Some(filename) = file_location.file_name() {
559            write!(
560                f,
561                "{}:{}:{}",
562                filename.to_str().unwrap(),
563                file_location.line(),
564                file_location.column()
565            )?;
566        } else {
567            write!(f, "<unknown location>")?;
568        }
569
570        Ok(())
571    }
572}
573
574pub struct FileLocation<'a> {
575    file: CXFile,
576    line: c_uint,
577    column: c_uint,
578    offset: c_uint,
579    _marker: PhantomData<&'a ()>,
580}
581
582impl<'a> FileLocation<'a> {
583    pub fn file_name(&self) -> Option<StringRef<'a>> {
584        if self.file.is_null() {
585            return None;
586        }
587
588        unsafe { Some(StringRef::from_raw(clang_getFileName(self.file))) }
589    }
590
591    pub fn line(&self) -> c_uint {
592        self.line
593    }
594
595    pub fn column(&self) -> c_uint {
596        self.column
597    }
598}
599
600pub struct StringRef<'a> {
601    string: CXString,
602    _marker: PhantomData<&'a ()>,
603}
604
605impl<'a> StringRef<'a> {
606    unsafe fn from_raw(string: CXString) -> StringRef<'a> {
607        StringRef {
608            string,
609            _marker: PhantomData,
610        }
611    }
612}
613
614impl<'a> Deref for StringRef<'a> {
615    type Target = CStr;
616
617    fn deref(&self) -> &CStr {
618        unsafe { CStr::from_ptr(clang_getCString(self.string)) }
619    }
620}
621
622impl<'a> Drop for StringRef<'a> {
623    fn drop(&mut self) {
624        unsafe {
625            clang_disposeString(self.string);
626        }
627    }
628}
629
630pub enum EvalResultKind {
631    Int,
632    Float,
633    StrLiteral,
634    Other,
635}
636
637pub struct EvalResult<'a> {
638    result: CXEvalResult,
639    _marker: PhantomData<&'a ()>,
640}
641
642impl<'a> EvalResult<'a> {
643    unsafe fn from_raw(result: CXEvalResult) -> EvalResult<'a> {
644        EvalResult {
645            result,
646            _marker: PhantomData,
647        }
648    }
649
650    pub fn kind(&self) -> EvalResultKind {
651        let kind = unsafe { clang_EvalResult_getKind(self.result) };
652
653        #[allow(non_upper_case_globals)]
654        match kind {
655            CXEval_Int => EvalResultKind::Int,
656            CXEval_Float => EvalResultKind::Float,
657            CXEval_StrLiteral => EvalResultKind::StrLiteral,
658            _ => EvalResultKind::Other,
659        }
660    }
661
662    pub fn is_unsigned_int(&self) -> bool {
663        unsafe { clang_EvalResult_isUnsignedInt(self.result) != 0 }
664    }
665
666    pub fn as_unsigned(&self) -> c_ulonglong {
667        unsafe { clang_EvalResult_getAsUnsigned(self.result) }
668    }
669
670    pub fn as_long_long(&self) -> c_longlong {
671        unsafe { clang_EvalResult_getAsLongLong(self.result) }
672    }
673
674    pub fn as_double(&self) -> f64 {
675        unsafe { clang_EvalResult_getAsDouble(self.result) }
676    }
677
678    pub fn as_str(&self) -> Option<&CStr> {
679        unsafe {
680            let ptr = clang_EvalResult_getAsStr(self.result);
681            if !ptr.is_null() {
682                Some(CStr::from_ptr(ptr))
683            } else {
684                None
685            }
686        }
687    }
688}
689
690impl<'a> Drop for EvalResult<'a> {
691    fn drop(&mut self) {
692        unsafe {
693            clang_EvalResult_dispose(self.result);
694        }
695    }
696}
697
698pub struct Tokens<'a> {
699    unit: CXTranslationUnit,
700    ptr: *mut CXToken,
701    len: usize,
702    _marker: PhantomData<&'a ()>,
703}
704
705impl<'a> Tokens<'a> {
706    unsafe fn from_raw(unit: CXTranslationUnit, ptr: *mut CXToken, len: usize) -> Tokens<'a> {
707        Tokens {
708            unit,
709            ptr,
710            len,
711            _marker: PhantomData,
712        }
713    }
714
715    pub fn len(&self) -> usize {
716        self.len
717    }
718
719    pub fn get(&self, index: usize) -> Option<Token<'a>> {
720        unsafe {
721            let slice = slice::from_raw_parts(self.ptr, self.len as usize);
722            slice.get(index).map(|t| Token::from_raw(self.unit, *t))
723        }
724    }
725}
726
727impl<'a> Drop for Tokens<'a> {
728    fn drop(&mut self) {
729        unsafe {
730            clang_disposeTokens(self.unit, self.ptr, self.len.try_into().unwrap());
731        }
732    }
733}
734
735pub struct Token<'a> {
736    unit: CXTranslationUnit,
737    token: CXToken,
738    _marker: PhantomData<&'a ()>,
739}
740
741impl<'a> Token<'a> {
742    unsafe fn from_raw(unit: CXTranslationUnit, token: CXToken) -> Token<'a> {
743        Token {
744            unit,
745            token,
746            _marker: PhantomData,
747        }
748    }
749
750    pub fn spelling(&self) -> StringRef<'a> {
751        unsafe { StringRef::from_raw(clang_getTokenSpelling(self.unit, self.token)) }
752    }
753}