Skip to main content

thrift_analyzer/analyzer/
mod.rs

1//! Thrift Analyzer.
2
3pub mod ast;
4pub mod base;
5pub mod macros;
6pub mod parser;
7pub mod scanner;
8pub mod symbol;
9pub mod token;
10
11use std::{
12    collections::{HashMap, HashSet},
13    fs, io,
14    path::{Path, PathBuf},
15    rc::Rc,
16};
17
18use ast::{DefinitionNode, FieldNode, FieldTypeNode, FunctionNode, HeaderNode};
19use base::{Location, Position};
20
21use crate::analyzer::{
22    ast::{DocumentNode, IdentifierNode, Node},
23    base::Error,
24    parser::Parser,
25    symbol::SymbolTable,
26};
27
28/// Analyzer for Thrift files.
29pub struct Analyzer {
30    documents: HashMap<String, Vec<char>>,
31
32    document_nodes: HashMap<String, Rc<DocumentNode>>,
33    symbol_tables: HashMap<String, Rc<SymbolTable>>,
34
35    errors: HashMap<String, Vec<Error>>,
36    semantic_tokens: HashMap<String, Vec<u32>>,
37
38    pub(crate) wasm_read_file: Option<Box<dyn Fn(String) -> io::Result<String>>>,
39}
40
41const KEYWORDS: &[&str] = &[
42    "namespace",
43    "include",
44    "cpp_include",
45    "const",
46    "typedef",
47    "extends",
48    "required",
49    "optional",
50    "oneway",
51    "void",
52    "bool",
53    "byte",
54    "i8",
55    "i16",
56    "i32",
57    "i64",
58    "struct",
59    "enum",
60    "union",
61    "exception",
62    "service",
63];
64
65impl Analyzer {
66    /// Create a new analyzer.
67    pub fn new() -> Self {
68        Self {
69            documents: HashMap::new(),
70            document_nodes: HashMap::new(),
71            symbol_tables: HashMap::new(),
72            errors: HashMap::new(),
73            semantic_tokens: HashMap::new(),
74            wasm_read_file: None,
75        }
76    }
77
78    /// Sync a document.
79    pub fn sync_document(&mut self, path: &str, content: &str) {
80        self.documents
81            .insert(path.to_string(), content.chars().collect());
82        self.analyze(path);
83    }
84
85    /// Remove a document.
86    pub fn remove_document(&mut self, path: &str) {
87        self.documents.remove(path);
88        self.document_nodes.remove(path);
89        self.symbol_tables.remove(path);
90        self.errors.remove(path);
91        self.semantic_tokens.remove(path);
92    }
93
94    /// Get the errors for all files.
95    pub fn errors(&self) -> &HashMap<String, Vec<Error>> {
96        &self.errors
97    }
98
99    /// Get semantic tokens for a specific file.
100    pub fn semantic_tokens(&self, path: &str) -> Option<&Vec<u32>> {
101        self.semantic_tokens.get(path)
102    }
103
104    /// Get the semantic token types.
105    pub fn semantic_token_types(&self) -> Vec<String> {
106        vec!["type".to_string(), "function".to_string()]
107    }
108
109    /// Get the semantic token modifiers.
110    pub fn semantic_token_modifiers(&self) -> Vec<String> {
111        vec![]
112    }
113
114    /// Get the definition at a specific position.
115    pub fn definition(&self, path: &str, pos: Position) -> Option<Location> {
116        let document_node = self.document_nodes.get(path)?.as_ref();
117        let identifier = self.find_identifier(document_node, pos)?;
118        let symbol_table = self.symbol_tables.get(path)?;
119        let (new_path, def, header) =
120            symbol_table.find_definition_of_identifier_type(identifier)?;
121
122        if identifier.position_in_namespace(pos) {
123            if let Some(include) = header {
124                return Some(Location {
125                    path: path.to_string(),
126                    range: include.range(),
127                });
128            }
129            return None;
130        }
131
132        Some(Location {
133            path: new_path,
134            range: def.identifier().range(),
135        })
136    }
137
138    /// Get the types for completion.
139    pub fn types_for_completion(&self, path: &str, pos: Position) -> Vec<String> {
140        let offset = match self.offset_at_position(path, pos) {
141            Some(offset) => offset,
142            None => return vec![],
143        };
144        let document = match self.documents.get(path) {
145            Some(document) => document,
146            None => return vec![],
147        };
148        let mut symbol_table = match self.symbol_tables.get(path) {
149            Some(symbol_table) => symbol_table.clone(),
150            None => return vec![],
151        };
152
153        if offset > 0 && document[offset - 1] == '.' {
154            let word = match self.idet_prev_offset(path, offset - 1) {
155                Some(word) => word,
156                None => return vec!["".to_string()],
157            };
158            let table = match symbol_table.includes().get(&word) {
159                Some(table) => table.clone(),
160                None => return vec!["".to_string()],
161            };
162            symbol_table = table;
163        }
164
165        return symbol_table.types().keys().cloned().collect();
166    }
167
168    /// Get the includes for completion.
169    pub fn includes_for_completion(&self, path: &str, _pos: Position) -> Vec<String> {
170        let symbol_table = match self.symbol_tables.get(path) {
171            Some(symbol_table) => symbol_table,
172            None => return vec![],
173        };
174
175        symbol_table.includes().keys().cloned().collect()
176    }
177
178    /// Get the keywords for completion.
179    pub fn keywords_for_completion(&self) -> Vec<String> {
180        KEYWORDS.iter().map(|s| s.to_string()).collect()
181    }
182}
183
184impl Analyzer {
185    /// Analyze a document.
186    fn analyze(&mut self, path: &str) {
187        // clear previous state
188        self.document_nodes.remove(path);
189        self.symbol_tables.remove(path);
190        self.errors.remove(path);
191        self.semantic_tokens.remove(path);
192
193        let mut visited = HashSet::new();
194        self.parse_document(path, &mut visited, None);
195        self.static_check(path);
196        self.generate_semantic_tokens(path);
197    }
198
199    /// Recursively parse AST and build symbol tables for a file.
200    fn parse_document(
201        &mut self,
202        path: &str,
203        visited: &mut HashSet<String>,
204        source: Option<(&str, &Rc<HeaderNode>)>,
205    ) -> bool {
206        // check for circular dependencies
207        if visited.contains(path) {
208            if let Some((source_path, node)) = source {
209                let error = Error {
210                    range: node.range(),
211                    message: format!("Circular dependency detected: {}", path),
212                };
213
214                self.errors
215                    .entry(source_path.to_string())
216                    .or_default()
217                    .push(error);
218            }
219            return false;
220        }
221
222        // mark file as being processed
223        visited.insert(path.to_string());
224
225        // if file is already parsed, return
226        if self.document_nodes.contains_key(path) {
227            return true;
228        }
229
230        // read the file
231        let content = if let Some(content) = self.documents.get(path) {
232            content
233        } else {
234            // try to read from local file system
235            match self.read_file(path) {
236                Ok(content) => &content.chars().collect(),
237                Err(e) => {
238                    if let Some((source_path, node)) = source {
239                        let error = Error {
240                            range: node.range(),
241                            message: format!("Failed to read file {}: {}", path, e),
242                        };
243
244                        self.errors
245                            .entry(source_path.to_string())
246                            .or_default()
247                            .push(error);
248                    }
249                    return false;
250                }
251            }
252        };
253
254        // parse the file
255        let (document_node, errors) = Parser::new(content).parse();
256
257        // store parser errors
258        self.errors
259            .entry(path.to_string())
260            .or_default()
261            .extend(errors.into_iter().map(|e| e));
262
263        // track file dependencies
264        let mut dependencies = Vec::new();
265        for header in &document_node.headers {
266            if let HeaderNode::Include(include) = header.as_ref() {
267                if let Some(parent) = path_parent(path) {
268                    dependencies.push((
269                        parent.join(&include.literal).to_string_lossy().to_string(),
270                        header.clone(),
271                    ));
272                }
273            }
274        }
275
276        // build symbol table
277        let mut symbol_table = SymbolTable::new_from_ast(path, &document_node);
278
279        // recursively parse dependencies
280        for (dep_path, header) in dependencies.iter() {
281            let res = self.parse_document(dep_path, visited, Some((path, header)));
282            visited.remove(dep_path.as_str());
283            if !res {
284                continue;
285            }
286
287            // add dependency to current symbol table
288            if let Some(dep_table) = self.symbol_tables.get(dep_path) {
289                symbol_table.add_dependency(dep_path, header.clone(), dep_table.clone());
290            }
291        }
292
293        // store document
294        self.symbol_tables
295            .insert(path.to_string(), Rc::new(symbol_table));
296        self.document_nodes
297            .insert(path.to_string(), Rc::new(document_node));
298
299        true
300    }
301}
302
303/// Static check
304impl Analyzer {
305    fn static_check(&mut self, path: &str) {
306        let document_node = match self.document_nodes.get(path) {
307            Some(document_node) => document_node.clone(),
308            None => return,
309        };
310        let symbol_table = match self.symbol_tables.get_mut(path) {
311            Some(symbol_table) => symbol_table.clone(),
312            None => return,
313        };
314
315        // type check
316        symbol_table.check_document_types(document_node.as_ref());
317        self.errors
318            .entry(path.to_string())
319            .or_default()
320            .extend(symbol_table.errors().into_iter().map(|e| e));
321
322        // field check
323        self.document_check(path, document_node.as_ref());
324    }
325
326    fn document_check(&mut self, path: &str, document_node: &DocumentNode) {
327        for definition in &document_node.definitions {
328            match definition.as_ref() {
329                DefinitionNode::Struct(struct_node) => {
330                    self.fields_check(path, &struct_node.fields);
331                }
332                DefinitionNode::Union(union_node) => {
333                    self.fields_check(path, &union_node.fields);
334                }
335                DefinitionNode::Exception(exception_node) => {
336                    self.fields_check(path, &exception_node.fields);
337                }
338                DefinitionNode::Service(service_node) => {
339                    self.functions_check(path, &service_node.functions);
340                }
341                _ => {}
342            }
343        }
344    }
345
346    fn fields_check(&mut self, path: &str, fields: &[FieldNode]) {
347        let mut field_ids = HashSet::new();
348        let mut field_identifiers = HashSet::new();
349
350        for field in fields {
351            if let Some(field_id) = &field.field_id {
352                if field_ids.contains(&field_id.id) {
353                    let error = Error {
354                        range: field_id.range.clone(),
355                        message: format!("Duplicate field ID: {}", field_id.id),
356                    };
357                    self.errors.entry(path.to_string()).or_default().push(error);
358                } else {
359                    field_ids.insert(field_id.id);
360                }
361            }
362
363            let identifier_name = &field.identifier.name;
364            if field_identifiers.contains(identifier_name) {
365                let error = Error {
366                    range: field.identifier.range.clone(),
367                    message: format!("Duplicate field identifier: {}", identifier_name),
368                };
369                self.errors.entry(path.to_string()).or_default().push(error);
370            } else {
371                field_identifiers.insert(identifier_name.clone());
372            }
373        }
374    }
375
376    fn functions_check(&mut self, path: &str, functions: &[FunctionNode]) {
377        let mut function_identifiers = HashSet::new();
378
379        for function in functions {
380            self.fields_check(path, &function.fields);
381
382            let identifier_name = &function.identifier.name;
383            if function_identifiers.contains(identifier_name) {
384                let error = Error {
385                    range: function.identifier.range.clone(),
386                    message: format!("Duplicate function identifier: {}", identifier_name),
387                };
388                self.errors.entry(path.to_string()).or_default().push(error);
389            } else {
390                function_identifiers.insert(identifier_name.clone());
391            }
392        }
393    }
394}
395
396/// Semantic tokens
397impl Analyzer {
398    /// Generate semantic tokens for a document.
399    fn generate_semantic_tokens(&mut self, path: &str) {
400        let field_type_identifiers = self.find_field_type_identifiers(path);
401        let function_identifiers = self.find_function_identifiers(path);
402
403        let mut identifiers: Vec<(&IdentifierNode, u32)> = Vec::new();
404        for id in field_type_identifiers {
405            identifiers.push((id, 0));
406        }
407        for id in function_identifiers {
408            identifiers.push((id, 1));
409        }
410
411        let new_tokens = self.convert_identifiers_to_semantic_tokens(identifiers);
412        self.semantic_tokens.insert(path.to_string(), new_tokens);
413    }
414
415    /// Find all IdentifierNode instances used as field types in the document nodes.
416    fn find_field_type_identifiers(&self, path: &str) -> Vec<&IdentifierNode> {
417        let mut result = Vec::new();
418
419        if let Some(document_node) = self.document_nodes.get(path) {
420            for definition in &document_node.definitions {
421                match definition.as_ref() {
422                    DefinitionNode::Const(const_node) => {
423                        result.extend(self.collect_field_type_identifiers(&const_node.field_type));
424                    }
425                    DefinitionNode::Typedef(typedef_node) => {
426                        result.extend(
427                            self.collect_field_type_identifiers(&typedef_node.definition_type),
428                        );
429                        result.push(&typedef_node.identifier);
430                    }
431                    DefinitionNode::Struct(struct_node) => {
432                        for field in &struct_node.fields {
433                            result.extend(self.collect_field_type_identifiers(&field.field_type));
434                        }
435                    }
436                    DefinitionNode::Union(union_node) => {
437                        for field in &union_node.fields {
438                            result.extend(self.collect_field_type_identifiers(&field.field_type));
439                        }
440                    }
441                    DefinitionNode::Exception(exception_node) => {
442                        for field in &exception_node.fields {
443                            result.extend(self.collect_field_type_identifiers(&field.field_type));
444                        }
445                    }
446                    DefinitionNode::Service(service_node) => {
447                        if let Some(extends) = &service_node.extends {
448                            result.push(extends);
449                        }
450
451                        for function in &service_node.functions {
452                            if let Some(function_type) = &function.function_type {
453                                result.extend(self.collect_field_type_identifiers(function_type));
454                            }
455                            for field in &function.fields {
456                                result
457                                    .extend(self.collect_field_type_identifiers(&field.field_type));
458                            }
459                            if let Some(throws) = &function.throws {
460                                for throw in throws {
461                                    result.extend(
462                                        self.collect_field_type_identifiers(&throw.field_type),
463                                    );
464                                }
465                            }
466                        }
467                    }
468
469                    _ => {}
470                }
471            }
472        }
473
474        result
475    }
476
477    /// Collect all IdentifierNode instances used as field types in the document nodes.
478    fn collect_field_type_identifiers<'a>(
479        &'a self,
480        field_type: &'a FieldTypeNode,
481    ) -> Vec<&'a IdentifierNode> {
482        match field_type {
483            FieldTypeNode::Identifier(identifier) => vec![identifier],
484            FieldTypeNode::BaseType(_) => vec![],
485            FieldTypeNode::MapType(map_type) => {
486                let mut result = self.collect_field_type_identifiers(&map_type.key_type);
487                result.extend(self.collect_field_type_identifiers(&map_type.value_type));
488                result
489            }
490            FieldTypeNode::SetType(set_type) => {
491                self.collect_field_type_identifiers(&set_type.type_node)
492            }
493            FieldTypeNode::ListType(list_type) => {
494                self.collect_field_type_identifiers(&list_type.type_node)
495            }
496        }
497    }
498
499    /// Convert a vector of IdentifierNode references to semantic tokens.
500    fn convert_identifiers_to_semantic_tokens(
501        &self,
502        mut identifiers: Vec<(&IdentifierNode, u32)>,
503    ) -> Vec<u32> {
504        identifiers.sort_by_key(|(identifier, _)| identifier.range());
505
506        let mut tokens = Vec::new();
507        let mut prev_line = 0;
508        let mut prev_char = 0;
509
510        for (identifier, token_type) in identifiers {
511            let range = identifier.range();
512
513            // convert to 0-based line and column
514            let line = range.start.line - 1 as u32;
515            let char = range.start.column - 1 as u32;
516            let length = identifier.name.len() as u32;
517
518            // deltaLine: line number relative to the previous token
519            let delta_line = line - prev_line;
520            // deltaStart: start character relative to the previous token
521            let delta_start = if delta_line == 0 {
522                char - prev_char
523            } else {
524                char
525            };
526            // length: length of the token
527            // tokenType: 0 for type, 1 for function (as defined in SemanticTokensLegend)
528            // tokenModifiers: 0 for no modifiers
529            tokens.extend_from_slice(&[delta_line, delta_start, length, token_type, 0]);
530
531            prev_line = line;
532            prev_char = char;
533        }
534
535        tokens
536    }
537
538    /// Find all function identifiers in the document nodes.
539    fn find_function_identifiers(&self, path: &str) -> Vec<&IdentifierNode> {
540        let mut result = Vec::new();
541
542        if let Some(document_node) = self.document_nodes.get(path) {
543            for definition in &document_node.definitions {
544                match definition.as_ref() {
545                    DefinitionNode::Service(service_node) => {
546                        for function in &service_node.functions {
547                            result.push(&function.identifier);
548                        }
549                    }
550                    _ => {}
551                }
552            }
553        }
554
555        result
556    }
557}
558
559/// Definition
560impl Analyzer {
561    /// Find an identifier at a specific position.
562    fn find_identifier<'a>(&self, node: &'a dyn Node, pos: Position) -> Option<&'a IdentifierNode> {
563        if !node.range().contains(pos) {
564            return None;
565        }
566
567        if let Some(identifier) = node.as_any().downcast_ref::<IdentifierNode>() {
568            return Some(identifier);
569        }
570
571        for child in node.children() {
572            if let Some(identifier) = self.find_identifier(child, pos) {
573                return Some(identifier);
574            }
575        }
576
577        None
578    }
579}
580
581/// Completion
582impl Analyzer {
583    /// Get the offset at a specific position.
584    fn offset_at_position(&self, path: &str, pos: Position) -> Option<usize> {
585        let document = self.documents.get(path)?;
586        let mut offset = 0;
587        let mut cur_pos = Position { line: 1, column: 1 };
588
589        while offset < document.len() {
590            if cur_pos >= pos {
591                break;
592            }
593
594            if document[offset] == '\n' {
595                offset += 1;
596                cur_pos.line += 1;
597                cur_pos.column = 1;
598            } else if document[offset] == '\r' {
599                offset += 1;
600                cur_pos.line += 1;
601                cur_pos.column = 1;
602                if offset < document.len() && document[offset] == '\n' {
603                    offset += 1;
604                }
605            } else {
606                offset += 1;
607                cur_pos.column += 1;
608            }
609        }
610
611        if cur_pos == pos {
612            Some(offset)
613        } else {
614            None
615        }
616    }
617
618    /// Get the identifier at the previous offset. no consider the '.'.
619    fn idet_prev_offset(&self, path: &str, offset: usize) -> Option<String> {
620        let document = self.documents.get(path)?;
621
622        Some(
623            document[..offset]
624                .iter()
625                .rev()
626                .take_while(|&&c| c.is_ascii_alphanumeric() || c == '_')
627                .collect::<Vec<_>>()
628                .into_iter()
629                .rev()
630                .collect(),
631        )
632    }
633}
634
635impl Analyzer {
636    fn read_file(&self, path: &str) -> io::Result<String> {
637        if let Some(read_file) = &self.wasm_read_file {
638            read_file(path.to_string())
639        } else {
640            fs::read_to_string(path)
641        }
642    }
643}
644
645/// Returns the parent path of a given path.
646///
647/// Build with WASM target on windows, `Path::new(path).parent()` always return `""`.
648/// So we need to implement our own path_parent function.
649fn path_parent(path: &str) -> Option<PathBuf> {
650    let parent = Path::new(path).parent();
651    if let Some(p) = parent {
652        if p.to_string_lossy().len() > 0 {
653            return Some(p.to_path_buf());
654        }
655    }
656
657    if let Some(p) = path.rfind("\\") {
658        return Some(PathBuf::from(&path[..p]));
659    }
660
661    parent.map(|p| p.to_path_buf())
662}