code2graph 0.0.0-beta.6

Purpose-neutral code-graph extraction: source files → symbols, references, and cross-file edges. Tree-sitter based, no storage opinion.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
// SPDX-License-Identifier: Apache-2.0

//! SQL extractor — extracts DDL symbols (tables, views, columns) and CTE
//! definitions via tree-sitter-sequel.
//!
//! Emits neutral [`FileFacts`] — no storage entries, no source bodies. References
//! (table use-sites in FROM/JOIN/INSERT/UPDATE/DELETE/REFERENCES clauses) are
//! emitted as [`RefRole::TypeRef`] so the language-agnostic resolver can link them
//! to their table/view definitions automatically.
//!
//! Tier-B scope/binding extraction: CTE names introduced by `WITH … AS (…)` are
//! emitted as [`BindingKind::Definition`] bindings in the enclosing statement scope
//! (not scope 0 — the file root). A `FROM revenue` reference inside the same
//! statement therefore resolves with [`Confidence::Scoped`](crate::graph::Confidence::Scoped).

use std::collections::HashMap;

use tree_sitter::{Node, Parser};

use crate::error::{CodegraphError, Result};
use crate::graph::types::{
    Binding, BindingKind, BindingTarget, ByteSpan, FileFacts, RefRole, Reference, Scope, ScopeId,
    ScopeKind, Symbol, SymbolKind, Visibility,
};
use crate::lang::Language;
use crate::symbol::Descriptor;

use super::{
    ExtractCtx, Extractor, attach_reference_scopes, definition_bindings, innermost_scope,
    make_symbol, node_span, push_scope,
};

/// Extracts SQL symbols and references (tables, views, columns).
pub struct SqlExtractor;

impl Extractor for SqlExtractor {
    fn lang(&self) -> Language {
        Language::Sql
    }

    fn extract(&self, source: &str, file: &str) -> Result<FileFacts> {
        let ts_language = crate::grammar::sql();
        let mut parser = Parser::new();
        parser
            .set_language(&ts_language)
            .map_err(|_| CodegraphError::Parse {
                path: file.to_owned(),
            })?;
        let tree = parser
            .parse(source, None)
            .ok_or_else(|| CodegraphError::Parse {
                path: file.to_owned(),
            })?;

        let root = tree.root_node();
        let bytes = source.as_bytes();
        let ctx = ExtractCtx {
            bytes,
            file,
            lang: Language::Sql,
        };

        // DDL symbols (CREATE TABLE / VIEW / columns) — bound at file-root scope (0).
        let defs = collect_symbols(&root, &ctx);
        let def_bindings = definition_bindings(&defs);
        let mut symbols = defs;

        // CTE symbols: each `WITH name AS (…)` introduces a name local to its
        // enclosing statement scope — NOT the file root.
        let cte_symbols = collect_cte_symbols(&root, &ctx);
        symbols.extend(cte_symbols.iter().cloned());

        // Module symbol: stable SCIP identity for the whole file.
        symbols.push(super::module_symbol(Language::Sql, &[], file, source.len()));

        // References (all object_reference nodes that aren't DDL definition names).
        let mut references = collect_references(&root, ctx.bytes, ctx.file);

        // Scope tree: scope[0] = Module over whole file; each `statement` or
        // `subquery` node gets its own `Other` scope.
        let scopes = collect_scopes(&root, source.len());
        attach_reference_scopes(&mut references, &scopes);

        // CTE bindings (Definition in statement scope) + DDL bindings (scope 0).
        let mut bindings = collect_cte_bindings(&root, ctx.bytes, &scopes, &cte_symbols);
        bindings.extend(def_bindings);

        Ok(FileFacts {
            file: file.to_owned(),
            lang: Language::Sql.as_str().to_owned(),
            symbols,
            references,
            scopes,
            bindings,
            ffi_exports: Vec::new(),
        })
    }
}

// ── Symbol extraction ─────────────────────────────────────────────────────────

/// Walk the tree recursively and collect DDL symbols (tables, views, columns).
///
/// SQL has no path-namespace derived from the file path — the optional schema
/// prefix comes from the SQL itself and is captured directly.
fn collect_symbols(root: &Node, ctx: &ExtractCtx) -> Vec<Symbol> {
    let mut out = Vec::new();
    collect_symbols_recursive(root, ctx, &mut out);
    out
}

fn collect_symbols_recursive(node: &Node, ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
    match node.kind() {
        "create_table" => {
            extract_table(node, ctx, out);
        }
        "create_view" | "create_materialized_view" => {
            extract_view(node, ctx, out);
        }
        _ => {
            for child in node.children(&mut node.walk()) {
                collect_symbols_recursive(&child, ctx, out);
            }
        }
    }
}

/// Extract the unquoted name and optional unquoted schema from the first
/// `object_reference` child of `node`. Returns `None` if the node has no
/// `object_reference` child or that child has no `name` field.
fn object_name_and_schema<'a>(
    node: &'a Node<'a>,
    bytes: &[u8],
) -> Option<(String, Option<String>)> {
    let obj_ref = first_object_reference(node)?;
    let name_node = obj_ref.child_by_field_name("name")?;
    let name = super::unquote(super::node_text(&name_node, bytes)).to_owned();
    let schema = obj_ref
        .child_by_field_name("schema")
        .map(|n| super::unquote(super::node_text(&n, bytes)).to_owned());
    Some((name, schema))
}

/// Extract the table name (and optional schema) from the first `object_reference`
/// child of a `create_table` node.
fn extract_table(node: &Node, ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
    let Some((table_name, schema)) = object_name_and_schema(node, ctx.bytes) else {
        return;
    };

    // Build the table symbol.
    let table_descriptors = build_descriptors(schema.as_deref(), &table_name, None);
    out.push(make_symbol(
        ctx,
        node,
        table_name.clone(),
        SymbolKind::Table,
        Visibility::Public,
        table_descriptors,
        super::one_line_signature(super::node_text(node, ctx.bytes), &['(']),
    ));

    // Extract columns from the `column_definitions` child (absent for CTAS).
    let Some(col_defs) = node
        .children(&mut node.walk())
        .find(|c| c.kind() == "column_definitions")
    else {
        return;
    };

    for col_child in col_defs.children(&mut col_defs.walk()) {
        if col_child.kind() != "column_definition" {
            continue;
        }
        let Some(col_name_node) = col_child.child_by_field_name("name") else {
            continue;
        };
        let raw_col = super::node_text(&col_name_node, ctx.bytes);
        let col_name = super::unquote(raw_col).to_owned();

        let col_descriptors = build_descriptors(schema.as_deref(), &table_name, Some(&col_name));
        out.push(make_symbol(
            ctx,
            &col_child,
            col_name,
            SymbolKind::Column,
            Visibility::Public,
            col_descriptors,
            super::one_line_signature(super::node_text(&col_child, ctx.bytes), &['(']),
        ));
    }
}

/// Extract the view name (and optional schema) from the first `object_reference`
/// child of a `create_view` or `create_materialized_view` node.
fn extract_view(node: &Node, ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
    let Some((view_name, schema)) = object_name_and_schema(node, ctx.bytes) else {
        return;
    };

    let view_descriptors = build_descriptors(schema.as_deref(), &view_name, None);
    out.push(make_symbol(
        ctx,
        node,
        view_name,
        SymbolKind::View,
        Visibility::Public,
        view_descriptors,
        super::one_line_signature(super::node_text(node, ctx.bytes), &['(']),
    ));
}

/// Find the first child of `node` with kind `object_reference`.
fn first_object_reference<'a>(node: &'a Node<'a>) -> Option<Node<'a>> {
    node.children(&mut node.walk())
        .find(|c| c.kind() == "object_reference")
}

/// Build the descriptor vec for a table/view (column = None) or column.
///
/// Schema namespace is prepended only when present. Columns use `Descriptor::Term`
/// as the leaf; tables/views use `Descriptor::Type`.
fn build_descriptors(schema: Option<&str>, table: &str, column: Option<&str>) -> Vec<Descriptor> {
    let mut descriptors = Vec::new();
    if let Some(s) = schema {
        descriptors.push(Descriptor::Namespace(s.to_owned()));
    }
    descriptors.push(Descriptor::Type(table.to_owned()));
    if let Some(col) = column {
        descriptors.push(Descriptor::Term(col.to_owned()));
    }
    descriptors
}

// ── Reference extraction ──────────────────────────────────────────────────────

/// Walk the tree and collect [`RefRole::TypeRef`] references for every
/// `object_reference` node that is NOT the definition name of a
/// `create_table` / `create_view` / `create_materialized_view` statement.
///
/// The rule is: a direct child `object_reference` of one of those three
/// parent kinds names the object being *created* (already a Symbol); every
/// other `object_reference` in the tree is a use-site — FROM/JOIN/INSERT INTO /
/// UPDATE / DELETE FROM / foreign-key REFERENCES / subquery names, etc.
///
/// v1 boundary: some constructs we don't yet extract as symbols (e.g.
/// `CREATE TRIGGER`, `CREATE INDEX`) also produce `object_reference` nodes for
/// their own names, which we'll emit as refs here. They simply resolve to
/// nothing (no matching symbol) — a harmless no-op until those symbol kinds are
/// added.
fn collect_references(root: &Node, bytes: &[u8], file: &str) -> Vec<Reference> {
    let mut out = Vec::new();
    collect_references_recursive(root, bytes, file, &mut out);
    out
}

fn collect_references_recursive(node: &Node, bytes: &[u8], file: &str, out: &mut Vec<Reference>) {
    if node.kind() == "object_reference" {
        // Determine whether this is the definition name: a direct child of
        // create_table / create_view / create_materialized_view.
        let is_definition_name = node
            .parent()
            .map(|p| {
                matches!(
                    p.kind(),
                    "create_table" | "create_view" | "create_materialized_view"
                )
            })
            .unwrap_or(false);

        if !is_definition_name {
            // Emit a TypeRef reference for this use-site.
            if let Some(name_node) = node.child_by_field_name("name") {
                let name = super::unquote(super::node_text(&name_node, bytes)).to_owned();
                if !name.is_empty() {
                    let qualifier = node
                        .child_by_field_name("schema")
                        .map(|n| super::unquote(super::node_text(&n, bytes)).to_owned());
                    out.push(Reference {
                        name,
                        occ: super::node_occurrence(node, file),
                        role: RefRole::TypeRef,
                        source_module: None,
                        from_path: None,
                        qualifier,
                        scope: None,
                        type_ref_ctx: None,
                    });
                }
            }
        }
        // Recurse into children of this object_reference as well (nested
        // object_references can appear in subqueries).
    }
    for child in node.children(&mut node.walk()) {
        collect_references_recursive(&child, bytes, file, out);
    }
}

// ── Tier-B: scope tree ────────────────────────────────────────────────────────

/// Build the lexical scope tree for one SQL file.
///
/// `scopes[0]` is always the file-root `Module` scope spanning `[0, source_len)`.
/// Each `statement` or `subquery` node opens a `ScopeKind::Other` scope; other
/// constructs do not introduce new name-resolution regions.
fn collect_scopes(root: &Node, source_len: usize) -> Vec<Scope> {
    let mut scopes = Vec::new();
    push_scope(
        &mut scopes,
        None,
        ByteSpan {
            start: 0,
            end: source_len,
        },
        ScopeKind::Module,
    );
    // Walk the root's children (the top-level statements in the program node).
    for child in root.children(&mut root.walk()) {
        scope_dfs(&child, 0, &mut scopes);
    }
    scopes
}

/// DFS that opens `Other` scopes for `statement` and `subquery` nodes.
fn scope_dfs(node: &Node, parent: ScopeId, scopes: &mut Vec<Scope>) {
    match node.kind() {
        "statement" | "subquery" => {
            let new_id = push_scope(scopes, Some(parent), node_span(node), ScopeKind::Other);
            for child in node.children(&mut node.walk()) {
                scope_dfs(&child, new_id, scopes);
            }
        }
        _ => {
            for child in node.children(&mut node.walk()) {
                scope_dfs(&child, parent, scopes);
            }
        }
    }
}

// ── Tier-B: CTE symbol extraction ────────────────────────────────────────────

/// Return the first `identifier` child of a `cte` node (the CTE name node).
///
/// Both CTE symbol and CTE binding collection need to locate this same node;
/// this helper avoids duplicating the traversal.
#[inline]
fn cte_identifier_node<'a>(cte_node: &Node<'a>) -> Option<Node<'a>> {
    cte_node
        .children(&mut cte_node.walk())
        .find(|c| c.kind() == "identifier")
}

/// Collect a [`Symbol`] for every CTE name introduced by `WITH … AS (…)`.
///
/// The CTE name is the first `identifier` child of a `cte` node. The symbol
/// uses the same [`Descriptor::Type`] leaf as DDL table/view definitions (no
/// schema prefix — CTEs are always local to their statement scope).
fn collect_cte_symbols(root: &Node, ctx: &ExtractCtx) -> Vec<Symbol> {
    let mut out = Vec::new();
    collect_cte_symbols_dfs(root, ctx, &mut out);
    out
}

fn collect_cte_symbols_dfs(node: &Node, ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
    if node.kind() == "cte" {
        // The first child of kind `identifier` is the CTE name.
        if let Some(name_node) = cte_identifier_node(node) {
            let name = super::unquote(super::node_text(&name_node, ctx.bytes)).to_owned();
            if !name.is_empty() {
                // Mirror DDL SymbolId style: Descriptor::Type as the single leaf.
                let descriptors = vec![Descriptor::Type(name.clone())];
                out.push(make_symbol(
                    ctx,
                    node,
                    name,
                    SymbolKind::Other,
                    Visibility::Public,
                    descriptors,
                    String::new(),
                ));
            }
        }
        // Do NOT recurse into `cte` children to avoid re-entering nested CTEs
        // from within this one's body statement — those will be visited by the
        // outer DFS as siblings of the parent `statement`.
        return;
    }
    for child in node.children(&mut node.walk()) {
        collect_cte_symbols_dfs(&child, ctx, out);
    }
}

// ── Tier-B: CTE binding collection ───────────────────────────────────────────

/// Collect [`BindingKind::Definition`] bindings for each CTE name.
///
/// Each binding lives in the innermost scope containing the CTE's name node —
/// that is the `statement` or `subquery` scope opened by the enclosing WITH
/// clause, never scope 0 (the file root). This is what enables the scope-graph
/// resolver to prefer the CTE definition over any global table of the same name.
fn collect_cte_bindings(
    root: &Node,
    bytes: &[u8],
    scopes: &[Scope],
    cte_symbols: &[Symbol],
) -> Vec<Binding> {
    // Index CTE symbols by name for O(1) lookup.
    let by_name: HashMap<&str, &Symbol> =
        cte_symbols.iter().map(|s| (s.name.as_str(), s)).collect();
    let mut out = Vec::new();
    collect_cte_bindings_dfs(root, bytes, scopes, &by_name, &mut out);
    out
}

fn collect_cte_bindings_dfs<'a>(
    node: &Node,
    bytes: &[u8],
    scopes: &[Scope],
    by_name: &HashMap<&'a str, &'a Symbol>,
    out: &mut Vec<Binding>,
) {
    if node.kind() == "cte" {
        if let Some(name_node) = cte_identifier_node(node) {
            let name = super::unquote(super::node_text(&name_node, bytes));
            if let Some(sym) = by_name.get(name) {
                let scope = innermost_scope(name_node.start_byte(), scopes).unwrap_or(0);
                out.push(Binding {
                    scope,
                    name: name.to_owned(),
                    intro: name_node.start_byte(),
                    kind: BindingKind::Definition,
                    target: BindingTarget::Def(sym.id.clone()),
                });
            }
        }
        // Do not recurse further — same rationale as collect_cte_symbols_dfs.
        return;
    }
    for child in node.children(&mut node.walk()) {
        collect_cte_bindings_dfs(&child, bytes, scopes, by_name, out);
    }
}

// ── Tests ─────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use crate::extract::extract_path;

    fn scip(sym: &Symbol) -> String {
        sym.id.to_scip_string()
    }

    fn find_by_name<'a>(symbols: &'a [Symbol], name: &str) -> Option<&'a Symbol> {
        symbols.iter().find(|s| s.name == name)
    }

    // ── Module symbol still present ───────────────────────────────────────────

    #[test]
    fn sql_stub_parses_and_emits_module_symbol() {
        let src = "CREATE TABLE users (id INT, name TEXT);";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        assert_eq!(facts.lang, "sql");
        assert!(
            !facts.symbols.is_empty(),
            "expected at least the module symbol, got {:?}",
            facts.symbols
        );
        let mod_sym = facts.symbols.iter().find(|s| s.name == "schema").unwrap();
        assert!(
            mod_sym.id.to_scip_string().contains("schema"),
            "module symbol SCIP string should contain the file stem; got: {}",
            mod_sym.id.to_scip_string()
        );
        // A pure CREATE TABLE with no use-sites emits no references.
        assert!(
            facts
                .references
                .iter()
                .all(|r| r.role != RefRole::TypeRef || r.name != "users"),
            "pure DDL should not emit a TypeRef reference for the table being created"
        );
    }

    #[test]
    fn dispatch_routes_sql_extension() {
        let src = "CREATE TABLE orders (id INT);";
        let facts = extract_path("db/orders.sql", src).unwrap();
        assert_eq!(facts.lang, "sql");
    }

    // ── Basic table + columns ─────────────────────────────────────────────────

    #[test]
    fn create_table_emits_table_and_column_symbols() {
        let src = "CREATE TABLE users (id INT, email TEXT);";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        let table = find_by_name(&facts.symbols, "users").expect("expected 'users' table symbol");
        assert_eq!(table.kind, SymbolKind::Table);
        assert!(
            scip(table).ends_with("users#"),
            "table SCIP should end with 'users#', got: {}",
            scip(table)
        );

        let id_col = find_by_name(&facts.symbols, "id").expect("expected 'id' column symbol");
        assert_eq!(id_col.kind, SymbolKind::Column);
        assert!(
            scip(id_col).ends_with("users#id."),
            "id column SCIP should end with 'users#id.', got: {}",
            scip(id_col)
        );

        let email_col =
            find_by_name(&facts.symbols, "email").expect("expected 'email' column symbol");
        assert_eq!(email_col.kind, SymbolKind::Column);
        assert!(
            scip(email_col).ends_with("users#email."),
            "email column SCIP should end with 'users#email.', got: {}",
            scip(email_col)
        );
    }

    // ── Schema-qualified table ────────────────────────────────────────────────

    #[test]
    fn schema_qualified_table_and_column() {
        let src = "CREATE TABLE app.users (id INT);";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        let table = find_by_name(&facts.symbols, "users").expect("expected 'users' symbol");
        assert_eq!(table.kind, SymbolKind::Table);
        assert!(
            scip(table).ends_with("app/users#"),
            "table SCIP should end with 'app/users#', got: {}",
            scip(table)
        );

        let id_col = find_by_name(&facts.symbols, "id").expect("expected 'id' column symbol");
        assert_eq!(id_col.kind, SymbolKind::Column);
        assert!(
            scip(id_col).ends_with("app/users#id."),
            "id column SCIP should end with 'app/users#id.', got: {}",
            scip(id_col)
        );
    }

    // ── View ─────────────────────────────────────────────────────────────────

    #[test]
    fn create_view_emits_view_symbol_no_columns() {
        let src = "CREATE VIEW active_users AS SELECT * FROM users;";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        let view =
            find_by_name(&facts.symbols, "active_users").expect("expected 'active_users' symbol");
        assert_eq!(view.kind, SymbolKind::View);
        assert!(
            scip(view).ends_with("active_users#"),
            "view SCIP should end with 'active_users#', got: {}",
            scip(view)
        );

        // No column symbols from a view (no column_definitions).
        let col_count = facts
            .symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Column)
            .count();
        assert_eq!(col_count, 0, "views should produce no column symbols");
    }

    // ── Quoted identifiers ────────────────────────────────────────────────────

    #[test]
    fn double_quoted_table_name_strips_quotes() {
        let src = r#"CREATE TABLE "my table" (id INT);"#;
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        let table = find_by_name(&facts.symbols, "my table")
            .expect("expected 'my table' symbol (unquoted)");
        assert_eq!(table.kind, SymbolKind::Table);
        assert!(
            scip(table).contains("my table"),
            "SCIP should contain the bare name 'my table', got: {}",
            scip(table)
        );
    }

    // ── CTAS guard (no column_definitions) ───────────────────────────────────

    #[test]
    fn ctas_does_not_panic_and_emits_table_symbol_only() {
        // CREATE TABLE ... AS SELECT has no column_definitions child.
        let src = "CREATE TABLE summary AS SELECT id, name FROM users;";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        // Should not panic; table symbol should still appear.
        let table =
            find_by_name(&facts.symbols, "summary").expect("expected 'summary' table symbol");
        assert_eq!(table.kind, SymbolKind::Table);

        // No column symbols because there is no column_definitions node.
        let col_count = facts
            .symbols
            .iter()
            .filter(|s| s.kind == SymbolKind::Column)
            .count();
        assert_eq!(col_count, 0, "CTAS table should produce no column symbols");
    }

    // ── Robustness / empty / malformed ────────────────────────────────────────

    #[test]
    fn empty_sql_does_not_panic_and_returns_module_symbol() {
        let facts = SqlExtractor.extract("", "db/empty.sql").unwrap();
        // At minimum the module symbol must be present.
        assert!(
            facts.symbols.iter().any(|s| s.kind == SymbolKind::Module),
            "empty SQL should still produce the module symbol"
        );
        // No DDL symbols at all.
        assert!(
            !facts.symbols.iter().any(|s| matches!(
                s.kind,
                SymbolKind::Table | SymbolKind::View | SymbolKind::Column
            )),
            "empty SQL should produce no DDL symbols"
        );
    }

    #[test]
    fn malformed_sql_does_not_panic() {
        let facts = SqlExtractor
            .extract("THIS IS NOT VALID SQL !!!", "db/bad.sql")
            .unwrap();
        // Must not panic; module symbol must be present.
        assert!(
            facts.symbols.iter().any(|s| s.kind == SymbolKind::Module),
            "malformed SQL should still return Ok with the module symbol"
        );
    }

    // ── Reference extraction tests ────────────────────────────────────────────

    /// `SELECT * FROM users` → one TypeRef reference named `users`, no qualifier.
    #[test]
    fn select_from_emits_typeref_reference() {
        let src = "SELECT * FROM users;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();
        let refs: Vec<_> = facts
            .references
            .iter()
            .filter(|r| r.role == RefRole::TypeRef && r.name == "users")
            .collect();
        assert_eq!(
            refs.len(),
            1,
            "expected exactly one TypeRef ref named 'users', got: {:?}",
            facts.references
        );
        assert_eq!(
            refs[0].qualifier, None,
            "unqualified ref should have no qualifier"
        );
    }

    /// `SELECT * FROM app.users` → TypeRef ref named `users`, qualifier `Some("app")`.
    #[test]
    fn select_from_schema_qualified_emits_qualifier() {
        let src = "SELECT * FROM app.users;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();
        let refs: Vec<_> = facts
            .references
            .iter()
            .filter(|r| r.role == RefRole::TypeRef && r.name == "users")
            .collect();
        assert_eq!(
            refs.len(),
            1,
            "expected one TypeRef ref named 'users', got: {:?}",
            facts.references
        );
        assert_eq!(
            refs[0].qualifier,
            Some("app".to_owned()),
            "schema-qualified ref should carry qualifier 'app'"
        );
    }

    /// JOIN emits a TypeRef reference for the joined table.
    #[test]
    fn join_emits_typeref_reference() {
        let src = "SELECT * FROM orders JOIN users ON orders.user_id = users.id;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();
        let users_refs: Vec<_> = facts
            .references
            .iter()
            .filter(|r| r.role == RefRole::TypeRef && r.name == "users")
            .collect();
        assert!(
            !users_refs.is_empty(),
            "expected at least one TypeRef ref for 'users' (JOIN target), got: {:?}",
            facts.references
        );
    }

    /// Foreign-key REFERENCES clause emits a TypeRef reference for the referenced table.
    #[test]
    fn foreign_key_references_emits_typeref() {
        let src = "CREATE TABLE orders (id INT, user_id INT REFERENCES users(id));";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();
        let fk_refs: Vec<_> = facts
            .references
            .iter()
            .filter(|r| r.role == RefRole::TypeRef && r.name == "users")
            .collect();
        assert_eq!(
            fk_refs.len(),
            1,
            "expected one TypeRef ref for FK REFERENCES 'users', got: {:?}",
            facts.references
        );
    }

    /// Pure DDL: `CREATE TABLE users (id INT)` alone emits NO TypeRef reference
    /// for `users` — the definition name is skipped.
    #[test]
    fn pure_ddl_no_typeref_for_definition_name() {
        let src = "CREATE TABLE users (id INT);";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();
        let typeref_users: Vec<_> = facts
            .references
            .iter()
            .filter(|r| r.role == RefRole::TypeRef && r.name == "users")
            .collect();
        assert!(
            typeref_users.is_empty(),
            "pure DDL CREATE TABLE should NOT emit a TypeRef ref for 'users' (it's the definition name), \
             got: {:?}",
            typeref_users
        );
    }

    // ── Tier-B scope / CTE tests ──────────────────────────────────────────────

    /// A WITH statement opens at least one non-module (Other) scope.
    #[test]
    fn cte_statement_opens_other_scope() {
        let src = "WITH r AS (SELECT 1) SELECT * FROM r;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();
        // scope[0] = Module; at least one more scope with kind Other.
        assert!(
            facts.scopes.len() >= 2,
            "expected at least two scopes (Module + statement), got: {:?}",
            facts.scopes
        );
        let has_other = facts.scopes.iter().any(|s| s.kind == ScopeKind::Other);
        assert!(has_other, "expected at least one ScopeKind::Other scope");
        // Every Other scope has Some(parent) leading back toward 0.
        for scope in &facts.scopes {
            if scope.kind == ScopeKind::Other {
                assert!(
                    scope.parent.is_some(),
                    "Other scope should have a parent, got: {:?}",
                    scope
                );
            }
        }
    }

    /// CTE name → Definition binding in a non-zero Other scope.
    #[test]
    fn cte_name_gets_definition_binding_in_statement_scope() {
        let src = "WITH revenue AS (SELECT amount FROM sales) SELECT * FROM revenue;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();

        let binding = facts
            .bindings
            .iter()
            .find(|b| b.name == "revenue" && b.kind == BindingKind::Definition);
        let binding = binding.expect("expected a Definition binding for 'revenue'");

        // Binding must be in a non-zero scope (not the file root).
        assert_ne!(
            binding.scope, 0,
            "CTE binding should be in a statement scope, not the file root (scope 0)"
        );
        // The scope it lives in must be ScopeKind::Other.
        assert_eq!(
            facts.scopes[binding.scope].kind,
            ScopeKind::Other,
            "CTE binding scope should be ScopeKind::Other, got: {:?}",
            facts.scopes[binding.scope]
        );
    }

    /// The `FROM revenue` reference inside a CTE statement has its scope set.
    #[test]
    fn cte_from_ref_has_scope_set() {
        let src = "WITH revenue AS (SELECT amount FROM sales) SELECT * FROM revenue;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();

        let revenue_ref = facts
            .references
            .iter()
            .find(|r| r.role == RefRole::TypeRef && r.name == "revenue");
        let revenue_ref =
            revenue_ref.expect("expected a TypeRef reference named 'revenue' (FROM revenue)");
        assert!(
            revenue_ref.scope.is_some(),
            "FROM revenue reference should have scope set, got None"
        );
    }

    /// CTE definition emits a Symbol with kind Other and the correct name/line.
    #[test]
    fn cte_emits_symbol() {
        let src = "WITH revenue AS (SELECT amount FROM sales) SELECT * FROM revenue;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();

        let sym = find_by_name(&facts.symbols, "revenue")
            .expect("expected a Symbol named 'revenue' for the CTE definition");
        assert_eq!(
            sym.kind,
            SymbolKind::Other,
            "CTE symbol kind should be Other"
        );
        assert_eq!(sym.line, 1, "CTE symbol should be on line 1");
    }

    /// A plain `SELECT * FROM users` (no CTE) still produces scopes and the
    /// `users` reference has its scope set.
    #[test]
    fn plain_select_scopes_and_ref_scope() {
        let src = "SELECT * FROM users;";
        let facts = SqlExtractor.extract(src, "db/query.sql").unwrap();

        assert!(
            !facts.scopes.is_empty(),
            "plain SELECT should still produce scopes"
        );
        let users_ref = facts
            .references
            .iter()
            .find(|r| r.role == RefRole::TypeRef && r.name == "users")
            .expect("expected TypeRef ref for 'users'");
        assert!(
            users_ref.scope.is_some(),
            "plain SELECT FROM ref should have scope set"
        );
    }

    /// DDL `CREATE TABLE orders (id INT)` still gets a Definition binding at scope 0.
    #[test]
    fn ddl_gets_definition_binding_at_scope_0() {
        let src = "CREATE TABLE orders (id INT);";
        let facts = SqlExtractor.extract(src, "db/schema.sql").unwrap();

        let binding = facts
            .bindings
            .iter()
            .find(|b| b.name == "orders" && b.kind == BindingKind::Definition);
        let binding = binding.expect("expected a Definition binding for 'orders'");
        assert_eq!(
            binding.scope, 0,
            "DDL Definition binding should be at file-root scope (0)"
        );
    }
}