fob_graph/semantic/
mod.rs

1//! Semantic analysis engine using Oxc for symbol extraction.
2//!
3//! This module provides the core functionality for analyzing JavaScript/TypeScript
4//! files to extract symbol information, detect unused declarations, and identify
5//! unreachable code.
6
7mod analysis;
8mod detection;
9mod utils;
10mod visitor;
11
12use super::symbol::{SymbolTable, UnreachableCode};
13use super::{ModuleId, SourceType};
14use crate::Result;
15
16use analysis::extract_symbols_from_program;
17use detection::detect_unreachable_with_visitor;
18
19/// Analyzes JavaScript/TypeScript source code to extract symbol information.
20///
21/// This function uses Oxc's semantic analyzer to build a complete symbol table containing
22/// all declared symbols (variables, functions, classes, types, etc.) along with their
23/// usage statistics.
24///
25/// # What it does
26///
27/// - **Symbol Extraction**: Identifies all declarations (let, const, var, function, class, etc.)
28/// - **Reference Counting**: Tracks how many times each symbol is read or written
29/// - **Scope Analysis**: Maintains scope hierarchy information
30/// - **TypeScript Support**: Handles interfaces, type aliases, enums, and other TS constructs
31/// - **Symbol Classification**: Categorizes symbols by kind (Variable, Function, Class, etc.)
32///
33/// # Arguments
34///
35/// * `source_text` - The JavaScript/TypeScript source code to analyze
36/// * `filename` - The filename (used for error reporting and source type detection)
37/// * `source_type` - The type of source file (JavaScript, TypeScript, JSX, TSX)
38///
39/// # Returns
40///
41/// Returns `Ok(SymbolTable)` containing all symbols found. If parsing fails (syntax errors),
42/// returns `Ok(empty_table)` to allow analysis to continue for other files.
43///
44/// # Error Handling
45///
46/// This function uses graceful degradation:
47/// - Parse errors result in an empty symbol table, not an error
48/// - Non-JavaScript files (CSS, JSON, etc.) return empty tables
49/// - This ensures that analysis can continue even if some files have issues
50///
51/// # Examples
52///
53/// ## Basic usage
54///
55/// ```rust,ignore
56/// use fob_graph::graph::semantic::analyze_symbols;
57/// use fob_graph::graph::SourceType;
58///
59/// let source = r#"
60///     const unused = 42;
61///     const used = 100;
62///     console.log(used);
63/// "#;
64///
65/// let table = analyze_symbols(source, "example.js", SourceType::JavaScript)?;
66/// assert_eq!(table.symbols.len(), 2);
67///
68/// // Check for unused symbols
69/// let unused_symbols = table.unused_symbols();
70/// assert_eq!(unused_symbols.len(), 1);
71/// assert_eq!(unused_symbols[0].name, "unused");
72/// ```
73///
74/// ## TypeScript analysis
75///
76/// ```rust,ignore
77/// let source = r#"
78///     interface User {
79///         name: string;
80///     }
81///
82///     type UserId = string;
83///
84///     function getUser(id: UserId): User {
85///         return { name: "test" };
86///     }
87/// "#;
88///
89/// let table = analyze_symbols(source, "types.ts", SourceType::TypeScript)?;
90///
91/// // Should find interface, type alias, and function
92/// assert!(table.symbols.len() >= 3);
93/// ```
94///
95/// # Performance
96///
97/// This function parses the source code using Oxc's fast parser and builds semantic
98/// information in a single pass. For typical JavaScript files, analysis completes
99/// in microseconds.
100///
101/// # Security
102///
103/// - All input is validated through Oxc's parser
104/// - No code execution occurs - purely static analysis
105/// - Safe to use on untrusted input
106pub fn analyze_symbols(
107    source: &str,
108    filename: &str,
109    source_type: SourceType,
110) -> Result<SymbolTable> {
111    // Handle non-JavaScript files
112    if !source_type.is_javascript_like() {
113        return Ok(SymbolTable::new());
114    }
115
116    // Convert our SourceType to Oxc's SourceType
117    use utils::convert_source_type;
118    let oxc_source_type = convert_source_type(source_type, filename);
119
120    // Create allocator for Oxc's arena-based allocation
121    use crate::oxc::{Allocator, Parser};
122    let allocator = Allocator::default();
123
124    // Parse the source code
125    let parser_return = Parser::new(&allocator, source, oxc_source_type).parse();
126
127    // If there are parse errors, return empty table (graceful degradation)
128    if !parser_return.errors.is_empty() {
129        return Ok(SymbolTable::new());
130    }
131
132    // Extract symbols from the parsed program
133    let mut table = extract_symbols_from_program(&parser_return.program, source);
134
135    // Extract class and enum members (which aren't tracked by Oxc's symbol table)
136    super::class_enum_extraction::extract_class_and_enum_members(
137        &parser_return.program,
138        source,
139        &mut table,
140    );
141
142    // Calculate code quality metrics for functions and classes
143    super::quality::calculate_quality_metrics(&parser_return.program, source, &mut table);
144
145    Ok(table)
146}
147
148/// Detect unreachable code in a JavaScript/TypeScript file.
149///
150/// This function uses a simple AST-based approach to detect code that appears
151/// after control flow terminators (return, throw, break, continue) in the same
152/// block. This is a simplified analysis and does not use full CFG.
153///
154/// # Arguments
155///
156/// * `source_text` - The source code to analyze
157/// * `filename` - The filename (for reporting)
158/// * `source_type` - The type of source file
159/// * `module_id` - The module ID for the unreachable code entries
160///
161/// # Returns
162///
163/// Returns a vector of `UnreachableCode` entries, one for each unreachable
164/// statement detected.
165pub fn detect_unreachable_code(
166    source_text: &str,
167    filename: &str,
168    source_type: SourceType,
169    module_id: ModuleId,
170) -> Result<Vec<UnreachableCode>> {
171    // Handle non-JavaScript files
172    if !source_type.is_javascript_like() {
173        return Ok(Vec::new());
174    }
175
176    // Convert our SourceType to Oxc's SourceType
177    use utils::convert_source_type;
178    let oxc_source_type = convert_source_type(source_type, filename);
179
180    // Create allocator for Oxc's arena-based allocation
181    use crate::oxc::{Allocator, Parser};
182    let allocator = Allocator::default();
183
184    // Parse the source code
185    let parser_return = Parser::new(&allocator, source_text, oxc_source_type).parse();
186
187    // If there are parse errors, return empty (graceful degradation)
188    if !parser_return.errors.is_empty() {
189        return Ok(Vec::new());
190    }
191
192    // Detect unreachable code using visitor
193    let unreachable =
194        detect_unreachable_with_visitor(&parser_return.program, source_text, module_id);
195
196    Ok(unreachable)
197}
198
199#[cfg(test)]
200mod tests {
201    use super::super::symbol::SymbolKind;
202    use super::*;
203
204    #[test]
205    fn test_analyze_simple_symbols() {
206        let source = r#"
207            const used = 42;
208            const unused = 100;
209            console.log(used);
210        "#;
211
212        let table =
213            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
214
215        // Should find 'used' and 'unused' variables
216        assert!(table.symbols.len() >= 2);
217
218        // Find the 'used' symbol
219        let used = table.symbols_by_name("used");
220        assert_eq!(used.len(), 1);
221        assert!(used[0].read_count > 0, "used should have read references");
222    }
223
224    #[test]
225    fn test_analyze_functions() {
226        let source = r#"
227            function usedFunction() {
228                return 42;
229            }
230
231            function unusedFunction() {
232                return 100;
233            }
234
235            usedFunction();
236        "#;
237
238        let table =
239            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
240
241        let used_fn = table.symbols_by_name("usedFunction");
242        assert_eq!(used_fn.len(), 1);
243        assert_eq!(used_fn[0].kind, SymbolKind::Function);
244        assert!(used_fn[0].read_count > 0);
245
246        let unused_fn = table.symbols_by_name("unusedFunction");
247        assert_eq!(unused_fn.len(), 1);
248        assert_eq!(unused_fn[0].kind, SymbolKind::Function);
249    }
250
251    #[test]
252    fn test_analyze_typescript() {
253        let source = r#"
254            interface User {
255                name: string;
256            }
257
258            type UserId = string;
259
260            const user: User = { name: "test" };
261        "#;
262
263        let table =
264            analyze_symbols(source, "test.ts", SourceType::TypeScript).expect("analysis failed");
265
266        // Should find interface, type alias, and variable
267        let interface_sym = table.symbols_by_name("User");
268        let type_sym = table.symbols_by_name("UserId");
269        let var_sym = table.symbols_by_name("user");
270
271        assert_eq!(interface_sym.len(), 1);
272        assert_eq!(type_sym.len(), 1);
273        assert_eq!(var_sym.len(), 1);
274    }
275
276    #[test]
277    fn test_graceful_parse_error_handling() {
278        let invalid_source = r#"
279            const x = {{{{{ invalid syntax
280        "#;
281
282        // Should not panic, should return empty table
283        let table = analyze_symbols(invalid_source, "invalid.js", SourceType::JavaScript)
284            .expect("should handle parse errors gracefully");
285
286        assert!(
287            table.is_empty(),
288            "should return empty table for invalid syntax"
289        );
290    }
291
292    #[test]
293    fn test_non_javascript_files() {
294        let css_content = "body { color: red; }";
295
296        let table = analyze_symbols(css_content, "styles.css", SourceType::Css)
297            .expect("should handle non-JS files");
298
299        assert!(table.is_empty(), "should return empty table for CSS files");
300    }
301
302    #[test]
303    fn test_line_column_calculation() {
304        use super::utils::LineIndex;
305        let source = "line 1\nline 2\nline 3";
306        let index = LineIndex::new(source);
307
308        // Start of file
309        assert_eq!(index.get_line_column(0, source), (1, 0));
310
311        // Start of line 2 (after first \n)
312        assert_eq!(index.get_line_column(7, source), (2, 0));
313
314        // Start of line 3
315        assert_eq!(index.get_line_column(14, source), (3, 0));
316    }
317
318    #[test]
319    fn test_line_column_offset_zero() {
320        use super::utils::LineIndex;
321
322        // Test with empty string
323        let empty_source = "";
324        let index = LineIndex::new(empty_source);
325        assert_eq!(index.get_line_column(0, empty_source), (1, 0));
326
327        // Test with single character
328        let single_char = "x";
329        let index = LineIndex::new(single_char);
330        assert_eq!(index.get_line_column(0, single_char), (1, 0));
331
332        // Test with newline at start
333        let newline_start = "\nline 2";
334        let index = LineIndex::new(newline_start);
335        assert_eq!(index.get_line_column(0, newline_start), (1, 0));
336    }
337
338    #[test]
339    fn test_symbol_spans() {
340        let source = r#"
341const x = 1;
342function f() {}
343"#;
344
345        let table =
346            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
347
348        // All symbols should have valid spans
349        for symbol in &table.symbols {
350            assert!(symbol.declaration_span.line > 0, "line should be positive");
351        }
352    }
353
354    #[test]
355    fn test_scope_tracking() {
356        let source = r#"
357            const global = 1;
358            function outer() {
359                const local = 2;
360                function inner() {
361                    const nested = 3;
362                }
363            }
364        "#;
365
366        let table =
367            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
368
369        // Should track multiple scopes
370        assert!(table.scope_count > 1, "should detect multiple scopes");
371    }
372
373    #[test]
374    fn test_unused_variable_detection() {
375        let source = r#"
376            function example() {
377                const used = 42;
378                const unused = 100;
379                return used;
380            }
381        "#;
382
383        let table =
384            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
385
386        // Find used and unused symbols
387        let used = table.symbols_by_name("used");
388        let unused = table.symbols_by_name("unused");
389
390        assert_eq!(used.len(), 1, "should find 'used' symbol");
391        assert_eq!(unused.len(), 1, "should find 'unused' symbol");
392
393        // Used should have reads
394        assert!(used[0].read_count > 0, "used should be read");
395
396        // Unused should have no reads
397        assert_eq!(unused[0].read_count, 0, "unused should not be read");
398    }
399
400    #[test]
401    fn test_used_function_has_reads() {
402        let source = r#"
403            function helper() { return 42; }
404            const x = helper();
405        "#;
406
407        let table =
408            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
409
410        let helper = table.symbols_by_name("helper");
411        assert_eq!(helper.len(), 1, "should find 'helper' function");
412        assert!(helper[0].read_count > 0, "helper() should be called");
413    }
414
415    #[test]
416    fn test_typescript_types() {
417        let source = r#"
418            interface User {
419                name: string;
420            }
421
422            type UserId = string;
423
424            const user: User = { name: "test" };
425        "#;
426
427        let table =
428            analyze_symbols(source, "test.ts", SourceType::TypeScript).expect("analysis failed");
429
430        // Check for interface
431        let interface_sym = table.symbols_by_name("User");
432        assert_eq!(interface_sym.len(), 1);
433        assert_eq!(interface_sym[0].kind, SymbolKind::Interface);
434
435        // Check for type alias
436        let type_sym = table.symbols_by_name("UserId");
437        assert_eq!(type_sym.len(), 1);
438        assert_eq!(type_sym[0].kind, SymbolKind::TypeAlias);
439
440        // Check for variable
441        let var_sym = table.symbols_by_name("user");
442        assert_eq!(var_sym.len(), 1);
443        assert_eq!(var_sym[0].kind, SymbolKind::Variable);
444    }
445
446    #[test]
447    fn test_unreachable_after_return() {
448        let source = r#"
449            function example() {
450                return true;
451                console.log('unreachable');
452            }
453        "#;
454
455        let module_id = ModuleId::new("test.js").expect("valid module id");
456        let unreachable =
457            detect_unreachable_code(source, "test.js", SourceType::JavaScript, module_id)
458                .expect("detection failed");
459
460        assert!(!unreachable.is_empty(), "should detect unreachable code");
461
462        // Check description
463        let desc = &unreachable[0].description;
464        assert!(desc.contains("return"), "should mention return statement");
465    }
466
467    #[test]
468    fn test_unreachable_after_throw() {
469        let source = r#"
470            function example() {
471                throw new Error('test');
472                console.log('unreachable');
473            }
474        "#;
475
476        let module_id = ModuleId::new("test.js").expect("valid module id");
477        let unreachable =
478            detect_unreachable_code(source, "test.js", SourceType::JavaScript, module_id)
479                .expect("detection failed");
480
481        assert!(
482            !unreachable.is_empty(),
483            "should detect unreachable code after throw"
484        );
485    }
486
487    #[test]
488    fn test_no_unreachable_when_none() {
489        let source = r#"
490            function example() {
491                console.log('reachable');
492                return true;
493            }
494        "#;
495
496        let module_id = ModuleId::new("test.js").expect("valid module id");
497        let unreachable =
498            detect_unreachable_code(source, "test.js", SourceType::JavaScript, module_id)
499                .expect("detection failed");
500
501        assert_eq!(
502            unreachable.len(),
503            0,
504            "should not detect unreachable code when none exists"
505        );
506    }
507
508    #[test]
509    fn test_class_symbol_kind() {
510        let source = r#"
511            class MyClass {
512                method() {}
513            }
514        "#;
515
516        let table =
517            analyze_symbols(source, "test.js", SourceType::JavaScript).expect("analysis failed");
518
519        let class_sym = table.symbols_by_name("MyClass");
520        assert_eq!(class_sym.len(), 1);
521        assert_eq!(class_sym[0].kind, SymbolKind::Class);
522    }
523}