Skip to main content

surql_parser/
schema_graph.rs

1//! Schema graph — semantic model built from SurrealQL definitions.
2//!
3//! Provides a queryable graph of tables, fields, indexes, functions, and their
4//! relationships. Built from parsed `.surql` files.
5//!
6//! # Example
7//!
8//! ```
9//! use surql_parser::SchemaGraph;
10//!
11//! let schema = SchemaGraph::from_source("
12//!     DEFINE TABLE user SCHEMAFULL;
13//!     DEFINE FIELD name ON user TYPE string;
14//!     DEFINE FIELD age ON user TYPE int;
15//!     DEFINE INDEX email_idx ON user FIELDS email UNIQUE;
16//!     DEFINE FUNCTION fn::greet($name: string) -> string { RETURN 'Hello, ' + $name; };
17//! ").unwrap();
18//!
19//! assert_eq!(schema.table_names().count(), 1);
20//! assert_eq!(schema.fields_of("user").len(), 2);
21//! assert_eq!(schema.indexes_of("user").len(), 1);
22//! assert!(schema.function("greet").is_some());
23//! ```
24
25use std::collections::{HashMap, HashSet, VecDeque};
26use std::path::Path;
27use std::sync::Arc;
28
29use surrealdb_types::{SqlFormat, ToSql};
30
31/// Source location of a definition in a `.surql` file.
32#[derive(Debug, Clone)]
33pub struct SourceLocation {
34	pub file: Arc<Path>,
35	pub offset: usize,
36	pub len: usize,
37}
38
39/// A parsed table definition.
40#[derive(Debug, Clone)]
41pub struct TableDef {
42	pub name: String,
43	pub full: bool,
44	pub table_type: String,
45	pub comment: Option<String>,
46	pub fields: Vec<FieldDef>,
47	pub indexes: Vec<IndexDef>,
48	pub events: Vec<EventDef>,
49	pub source: Option<SourceLocation>,
50	pub ns: Option<String>,
51	pub db: Option<String>,
52}
53
54/// A parsed field definition.
55#[derive(Debug, Clone)]
56pub struct FieldDef {
57	pub name: String,
58	pub kind: Option<String>,
59	pub record_links: Vec<String>,
60	pub default: Option<String>,
61	pub readonly: bool,
62	pub comment: Option<String>,
63	pub source: Option<SourceLocation>,
64}
65
66/// A parsed index definition.
67#[derive(Debug, Clone)]
68pub struct IndexDef {
69	pub name: String,
70	pub columns: Vec<String>,
71	pub unique: bool,
72	pub comment: Option<String>,
73	pub source: Option<SourceLocation>,
74}
75
76/// A parsed event definition.
77#[derive(Debug, Clone)]
78pub struct EventDef {
79	pub name: String,
80	pub comment: Option<String>,
81	pub source: Option<SourceLocation>,
82}
83
84/// A parsed function definition.
85#[derive(Debug, Clone)]
86pub struct FunctionDef {
87	pub name: String,
88	pub args: Vec<(String, String)>,
89	pub returns: Option<String>,
90	pub comment: Option<String>,
91	pub source: Option<SourceLocation>,
92}
93
94/// A parsed param definition.
95#[derive(Debug, Clone)]
96pub struct ParamDef {
97	pub name: String,
98	pub source: Option<SourceLocation>,
99}
100
101/// A node in a dependency tree, representing a table linked via `record<>` fields.
102///
103/// Used by [`SchemaGraph::dependency_tree`] to build a nested view of how tables
104/// are connected through record links.
105#[derive(Debug, Clone)]
106pub struct DependencyNode {
107	/// Table name at this node.
108	pub table: String,
109	/// The field on the *parent* node that links to this table (`None` for the root).
110	pub field: Option<String>,
111	/// Child nodes — tables linked from this node's `record<>` fields.
112	pub children: Vec<DependencyNode>,
113	/// `true` if this table was already visited in an ancestor node (cycle detected).
114	pub is_cycle: bool,
115}
116
117/// A semantic graph of SurrealQL schema definitions.
118///
119/// Built from `.surql` files, provides fast lookups for tables, fields,
120/// functions, and their relationships.
121#[derive(Debug, Clone, Default)]
122pub struct SchemaGraph {
123	tables: HashMap<String, TableDef>,
124	functions: HashMap<String, FunctionDef>,
125	params: HashMap<String, ParamDef>,
126	/// field_name -> [(table_name, field_idx)] for O(1) field lookups.
127	field_index: HashMap<String, Vec<(String, usize)>>,
128}
129
130impl SchemaGraph {
131	/// Build a schema graph from a SurrealQL string.
132	pub fn from_source(input: &str) -> crate::Result<Self> {
133		let defs = crate::extract_definitions(input)?;
134		Ok(Self::from_definitions(&defs))
135	}
136
137	/// Build a schema graph by walking all `.surql` files in a directory.
138	///
139	/// Source locations are tracked per file for go-to-definition support.
140	pub fn from_files(dir: &Path) -> anyhow::Result<Self> {
141		let mut graph = Self::default();
142		Self::collect_files_recursive(dir, &mut graph, 0)?;
143		Ok(graph)
144	}
145
146	/// Build per-file schema graphs by walking all `.surql` files in a directory.
147	///
148	/// Returns a map from file path to its individual `SchemaGraph`.
149	/// Used for incremental rebuilding: on save, only the changed file is re-parsed,
150	/// then all per-file graphs are merged.
151	pub fn from_files_per_file(dir: &Path) -> anyhow::Result<HashMap<std::path::PathBuf, Self>> {
152		let mut per_file = HashMap::new();
153		Self::collect_files_per_file_recursive(dir, &mut per_file, 0)?;
154		Ok(per_file)
155	}
156
157	/// Build a schema graph from a single `.surql` file.
158	///
159	/// Returns `None` if the file cannot be read or parsed.
160	pub fn from_single_file(path: &Path) -> Option<Self> {
161		let content = match crate::read_surql_file(path) {
162			Ok(c) => c,
163			Err(e) => {
164				tracing::warn!("{e}");
165				return None;
166			}
167		};
168		let (stmts, _) = crate::parse_with_recovery(&content);
169		match crate::extract_definitions_from_ast(&stmts) {
170			Ok(defs) => {
171				let mut graph = Self::from_definitions(&defs);
172				graph.attach_source_locations(&content, path);
173				Some(graph)
174			}
175			Err(e) => {
176				tracing::warn!("Cannot extract defs from {}: {e}", path.display());
177				None
178			}
179		}
180	}
181
182	fn collect_files_per_file_recursive(
183		dir: &Path,
184		per_file: &mut HashMap<std::path::PathBuf, Self>,
185		depth: u32,
186	) -> anyhow::Result<()> {
187		if depth > 32 {
188			tracing::warn!(
189				"Max directory depth (32) exceeded at {}, skipping",
190				dir.display()
191			);
192			return Ok(());
193		}
194		let mut entries: Vec<_> = std::fs::read_dir(dir)?
195			.filter_map(|e| match e {
196				Ok(entry) => Some(entry),
197				Err(err) => {
198					tracing::warn!("Skipping unreadable entry in {}: {err}", dir.display());
199					None
200				}
201			})
202			.collect();
203		entries.sort_by_key(|e| e.file_name());
204
205		for entry in entries {
206			let path = entry.path();
207			if path
208				.symlink_metadata()
209				.map(|m| m.is_symlink())
210				.unwrap_or(false)
211			{
212				tracing::warn!("Skipping symlink: {}", path.display());
213				continue;
214			}
215			if path.is_dir() {
216				let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
217				if matches!(
218					name,
219					"target"
220						| "node_modules" | ".git"
221						| "build" | "fixtures"
222						| "dist" | ".cache"
223						| "surql-lsp-out"
224				) || name.starts_with('.')
225				{
226					continue;
227				}
228			}
229			if path.is_file() && path.extension().is_some_and(|ext| ext == "surql") {
230				if let Some(graph) = Self::from_single_file(&path) {
231					per_file.insert(path, graph);
232				}
233			} else if path.is_dir() {
234				Self::collect_files_per_file_recursive(&path, per_file, depth + 1)?;
235			}
236		}
237		Ok(())
238	}
239
240	fn collect_files_recursive(dir: &Path, graph: &mut Self, depth: u32) -> anyhow::Result<()> {
241		if depth > 32 {
242			tracing::warn!(
243				"Max directory depth (32) exceeded at {}, skipping",
244				dir.display()
245			);
246			return Ok(());
247		}
248		let mut entries: Vec<_> = std::fs::read_dir(dir)?
249			.filter_map(|e| match e {
250				Ok(entry) => Some(entry),
251				Err(err) => {
252					tracing::warn!("Skipping unreadable entry in {}: {err}", dir.display());
253					None
254				}
255			})
256			.collect();
257		entries.sort_by_key(|e| e.file_name());
258
259		for entry in entries {
260			let path = entry.path();
261			if path
262				.symlink_metadata()
263				.map(|m| m.is_symlink())
264				.unwrap_or(false)
265			{
266				tracing::warn!("Skipping symlink: {}", path.display());
267				continue;
268			}
269			if path.is_dir() {
270				let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
271				if matches!(
272					name,
273					"target"
274						| "node_modules" | ".git"
275						| "build" | "fixtures"
276						| "dist" | ".cache"
277						| "surql-lsp-out"
278				) || name.starts_with('.')
279				{
280					continue;
281				}
282			}
283			if path.is_file() && path.extension().is_some_and(|ext| ext == "surql") {
284				let content = match crate::read_surql_file(&path) {
285					Ok(c) => c,
286					Err(e) => {
287						tracing::warn!("Skipping {e}");
288						continue;
289					}
290				};
291				let (stmts, _) = crate::parse_with_recovery(&content);
292				match crate::extract_definitions_from_ast(&stmts) {
293					Ok(defs) => {
294						let mut file_graph = Self::from_definitions(&defs);
295						file_graph.attach_source_locations(&content, &path);
296						graph.merge(file_graph);
297					}
298					Err(e) => {
299						tracing::warn!("Skipping defs from {}: {e}", path.display());
300					}
301				}
302			} else if path.is_dir() {
303				Self::collect_files_recursive(&path, graph, depth + 1)?;
304			}
305		}
306		Ok(())
307	}
308
309	/// Scan source text via the lexer for DEFINE statement positions.
310	pub fn attach_source_locations(&mut self, source: &str, file: &Path) {
311		use crate::upstream::syn::lexer::Lexer;
312		use crate::upstream::syn::token::TokenKind;
313
314		let bytes = source.as_bytes();
315		if bytes.len() > u32::MAX as usize {
316			return;
317		}
318
319		let tokens: Vec<_> = Lexer::new(bytes).collect();
320		let file: Arc<Path> = Arc::from(file);
321
322		for i in 0..tokens.len() {
323			let define_token = &tokens[i];
324			if token_text(source, define_token).to_uppercase() != "DEFINE" {
325				continue;
326			}
327
328			// Skip OVERWRITE/IF NOT EXISTS
329			let mut j = i + 1;
330			while j < tokens.len() {
331				let t = token_text(source, &tokens[j]).to_uppercase();
332				if t == "OVERWRITE" || t == "IF" || t == "NOT" || t == "EXISTS" {
333					j += 1;
334				} else {
335					break;
336				}
337			}
338			if j >= tokens.len() {
339				continue;
340			}
341
342			let kind_text = token_text(source, &tokens[j]).to_uppercase();
343
344			if kind_text == "TABLE" && j + 1 < tokens.len() {
345				let name_token = &tokens[j + 1];
346				let name = token_text(source, name_token);
347				if let Some(table) = self.tables.get_mut(name).filter(|t| t.source.is_none()) {
348					table.source = Some(SourceLocation {
349						file: Arc::clone(&file),
350						offset: define_token.span.offset as usize,
351						len: (name_token.span.offset + name_token.span.len
352							- define_token.span.offset) as usize,
353					});
354				}
355			}
356
357			if kind_text == "FUNCTION" && j + 1 < tokens.len() {
358				let fn_start = j + 1;
359				let mut fn_end = fn_start;
360				while fn_end < tokens.len() {
361					let tk = tokens[fn_end].kind;
362					if tk == TokenKind::Identifier
363						|| tk == TokenKind::PathSeperator
364						|| matches!(tk, TokenKind::Keyword(_))
365					{
366						fn_end += 1;
367					} else {
368						break;
369					}
370				}
371				if fn_end > fn_start {
372					let name_start = tokens[fn_start].span.offset as usize;
373					let name_end =
374						(tokens[fn_end - 1].span.offset + tokens[fn_end - 1].span.len) as usize;
375					let full_name = &source[name_start..name_end];
376					let fn_name = full_name.strip_prefix("fn::").unwrap_or(full_name);
377					if let Some(func) = self
378						.functions
379						.get_mut(fn_name)
380						.filter(|f| f.source.is_none())
381					{
382						func.source = Some(SourceLocation {
383							file: Arc::clone(&file),
384							offset: define_token.span.offset as usize,
385							len: name_end - define_token.span.offset as usize,
386						});
387					}
388				}
389			}
390
391			if kind_text == "FIELD" && j + 1 < tokens.len() {
392				let field_name = token_text(source, &tokens[j + 1]);
393				for k in (j + 2)..tokens.len().min(j + 6) {
394					if token_text(source, &tokens[k]).to_uppercase() == "ON" && k + 1 < tokens.len()
395					{
396						let table_name = token_text(source, &tokens[k + 1]);
397						if let Some(table) = self.tables.get_mut(table_name) {
398							for field in &mut table.fields {
399								if field.name == field_name && field.source.is_none() {
400									field.source = Some(SourceLocation {
401										file: Arc::clone(&file),
402										offset: define_token.span.offset as usize,
403										len: (tokens[(k + 1).min(tokens.len() - 1)].span.offset
404											+ tokens[(k + 1).min(tokens.len() - 1)].span.len
405											- define_token.span.offset) as usize,
406									});
407								}
408							}
409						}
410						break;
411					}
412				}
413			}
414		}
415	}
416
417	/// Merge another schema graph into this one.
418	///
419	/// Fields, indexes, and events are deduplicated by name.
420	/// SCHEMAFULL wins over SCHEMALESS (once a table is marked SCHEMAFULL, it stays).
421	pub fn merge(&mut self, other: SchemaGraph) {
422		for (name, table) in other.tables {
423			if let Some(existing) = self.tables.get_mut(&name) {
424				if table.full {
425					existing.full = true;
426				}
427				let existing_field_names: HashSet<String> =
428					existing.fields.iter().map(|f| f.name.clone()).collect();
429				for field in table.fields {
430					if !existing_field_names.contains(&field.name) {
431						existing.fields.push(field);
432					}
433				}
434				let existing_index_names: HashSet<String> =
435					existing.indexes.iter().map(|i| i.name.clone()).collect();
436				for index in table.indexes {
437					if !existing_index_names.contains(&index.name) {
438						existing.indexes.push(index);
439					}
440				}
441				let existing_event_names: HashSet<String> =
442					existing.events.iter().map(|e| e.name.clone()).collect();
443				for event in table.events {
444					if !existing_event_names.contains(&event.name) {
445						existing.events.push(event);
446					}
447				}
448				if table.source.is_some() {
449					existing.source = table.source;
450				}
451			} else {
452				self.tables.insert(name, table);
453			}
454		}
455		for (name, func) in other.functions {
456			if let std::collections::hash_map::Entry::Vacant(e) = self.functions.entry(name.clone())
457			{
458				e.insert(func);
459			} else {
460				tracing::warn!("Duplicate function definition: fn::{name} (keeping first)");
461			}
462		}
463		for (name, param) in other.params {
464			if let std::collections::hash_map::Entry::Vacant(e) = self.params.entry(name.clone()) {
465				e.insert(param);
466			} else {
467				tracing::warn!("Duplicate param definition: ${name} (keeping first)");
468			}
469		}
470		self.rebuild_field_index();
471	}
472
473	pub fn from_definitions(defs: &crate::SchemaDefinitions) -> Self {
474		let mut graph = Self::default();
475
476		for t in &defs.tables {
477			let name = expr_to_string(&t.name);
478			graph.tables.insert(
479				name.clone(),
480				TableDef {
481					name,
482					full: t.full,
483					table_type: format!("{:?}", t.table_type),
484					comment: extract_comment(&t.comment),
485					fields: Vec::new(),
486					indexes: Vec::new(),
487					events: Vec::new(),
488					source: None,
489					ns: defs.current_ns.clone(),
490					db: defs.current_db.clone(),
491				},
492			);
493		}
494
495		for f in &defs.fields {
496			let field_name = expr_to_string(&f.name);
497			let table_name = expr_to_string(&f.what);
498			let kind_str = f.field_kind.as_ref().map(|k| {
499				let mut s = String::new();
500				k.fmt_sql(&mut s, SqlFormat::SingleLine);
501				s
502			});
503			let record_links = f
504				.field_kind
505				.as_ref()
506				.map(extract_record_links)
507				.unwrap_or_default();
508			let default_str = match &f.default {
509				crate::upstream::sql::statements::define::DefineDefault::None => None,
510				crate::upstream::sql::statements::define::DefineDefault::Always(e)
511				| crate::upstream::sql::statements::define::DefineDefault::Set(e) => Some(expr_to_string(e)),
512			};
513
514			let def = FieldDef {
515				name: field_name,
516				kind: kind_str,
517				record_links,
518				default: default_str,
519				readonly: f.readonly,
520				comment: extract_comment(&f.comment),
521				source: None,
522			};
523
524			if let Some(table) = graph.tables.get_mut(&table_name) {
525				if !table.fields.iter().any(|f| f.name == def.name) {
526					table.fields.push(def);
527				}
528			} else {
529				graph.tables.insert(
530					table_name.clone(),
531					TableDef {
532						name: table_name,
533						full: false,
534						table_type: "Any".into(),
535						comment: None,
536						fields: vec![def],
537						indexes: Vec::new(),
538						events: Vec::new(),
539						source: None,
540						ns: defs.current_ns.clone(),
541						db: defs.current_db.clone(),
542					},
543				);
544			}
545		}
546
547		for idx in &defs.indexes {
548			let index_name = expr_to_string(&idx.name);
549			let table_name = expr_to_string(&idx.what);
550			let columns: Vec<String> = idx.cols.iter().map(expr_to_string).collect();
551			let unique = matches!(idx.index, crate::upstream::sql::index::Index::Uniq);
552
553			let def = IndexDef {
554				name: index_name,
555				columns,
556				unique,
557				comment: extract_comment(&idx.comment),
558				source: None,
559			};
560
561			if let Some(table) = graph.tables.get_mut(&table_name) {
562				table.indexes.push(def);
563			}
564		}
565
566		for ev in &defs.events {
567			let event_name = expr_to_string(&ev.name);
568			let table_name = expr_to_string(&ev.target_table);
569
570			let def = EventDef {
571				name: event_name,
572				comment: extract_comment(&ev.comment),
573				source: None,
574			};
575
576			if let Some(table) = graph.tables.get_mut(&table_name) {
577				table.events.push(def);
578			}
579		}
580
581		for func in &defs.functions {
582			let args: Vec<(String, String)> = func
583				.args
584				.iter()
585				.map(|(name, kind)| {
586					let mut kind_str = String::new();
587					kind.fmt_sql(&mut kind_str, SqlFormat::SingleLine);
588					(name.clone(), kind_str)
589				})
590				.collect();
591
592			let returns = func.returns.as_ref().map(|k| {
593				let mut s = String::new();
594				k.fmt_sql(&mut s, SqlFormat::SingleLine);
595				s
596			});
597
598			graph.functions.insert(
599				func.name.clone(),
600				FunctionDef {
601					name: func.name.clone(),
602					args,
603					returns,
604					comment: extract_comment(&func.comment),
605					source: None,
606				},
607			);
608		}
609
610		for p in &defs.params {
611			graph.params.insert(
612				p.name.clone(),
613				ParamDef {
614					name: p.name.clone(),
615					source: None,
616				},
617			);
618		}
619
620		graph.rebuild_field_index();
621		graph
622	}
623
624	fn rebuild_field_index(&mut self) {
625		self.field_index.clear();
626		for (table_name, table) in &self.tables {
627			for (field_idx, field) in table.fields.iter().enumerate() {
628				self.field_index
629					.entry(field.name.clone())
630					.or_default()
631					.push((table_name.clone(), field_idx));
632			}
633		}
634	}
635
636	/// Return a filtered schema containing only tables matching the given NS/DB scope.
637	/// Tables with no NS/DB (None) are included in all scopes (default context).
638	pub fn scoped(&self, ns: Option<&str>, db: Option<&str>) -> Self {
639		let tables: HashMap<String, TableDef> = self
640			.tables
641			.iter()
642			.filter(|(_, t)| scope_matches(&t.ns, &t.db, ns, db))
643			.map(|(k, v)| (k.clone(), v.clone()))
644			.collect();
645		let mut graph = Self {
646			tables,
647			functions: self.functions.clone(),
648			params: self.params.clone(),
649			field_index: HashMap::new(),
650		};
651		graph.rebuild_field_index();
652		graph
653	}
654
655	// ─── Lookups ───
656
657	/// Iterate over all table names in the schema.
658	pub fn table_names(&self) -> impl Iterator<Item = &str> {
659		self.tables.keys().map(|s| s.as_str())
660	}
661
662	/// Get table definition by name.
663	pub fn table(&self, name: &str) -> Option<&TableDef> {
664		self.tables.get(name)
665	}
666
667	/// Fields defined on a table.
668	pub fn fields_of(&self, table: &str) -> &[FieldDef] {
669		self.tables
670			.get(table)
671			.map(|t| t.fields.as_slice())
672			.unwrap_or(&[])
673	}
674
675	/// Indexes defined on a table.
676	pub fn indexes_of(&self, table: &str) -> &[IndexDef] {
677		self.tables
678			.get(table)
679			.map(|t| t.indexes.as_slice())
680			.unwrap_or(&[])
681	}
682
683	/// Events defined on a table.
684	pub fn events_of(&self, table: &str) -> &[EventDef] {
685		self.tables
686			.get(table)
687			.map(|t| t.events.as_slice())
688			.unwrap_or(&[])
689	}
690
691	/// Get function definition by name (without the `fn::` prefix).
692	pub fn function(&self, name: &str) -> Option<&FunctionDef> {
693		self.functions.get(name)
694	}
695
696	/// Iterate over all function names (without `fn::` prefix).
697	pub fn function_names(&self) -> impl Iterator<Item = &str> {
698		self.functions.keys().map(|s| s.as_str())
699	}
700
701	/// Iterate over all defined param names.
702	pub fn param_names(&self) -> impl Iterator<Item = &str> {
703		self.params.keys().map(|s| s.as_str())
704	}
705
706	/// Find a field by name across all tables. Returns (table_name, field_def).
707	///
708	/// Uses a pre-built HashMap index for O(1) lookup by field name,
709	/// instead of scanning all tables.
710	pub fn find_field(&self, field_name: &str) -> Vec<(&str, &FieldDef)> {
711		let Some(entries) = self.field_index.get(field_name) else {
712			return Vec::new();
713		};
714		entries
715			.iter()
716			.filter_map(|(table_name, field_idx)| {
717				let table = self.tables.get(table_name)?;
718				let field = table.fields.get(*field_idx)?;
719				Some((table_name.as_str(), field))
720			})
721			.collect()
722	}
723
724	/// Find a field on a specific table.
725	pub fn field_on(&self, table: &str, field_name: &str) -> Option<&FieldDef> {
726		self.tables
727			.get(table)
728			.and_then(|t| t.fields.iter().find(|f| f.name == field_name))
729	}
730
731	// ─── Graph Traversal ───
732
733	/// Tables reachable from `start` within `max_depth` hops via `record<>` links.
734	///
735	/// Uses BFS with cycle detection. Returns `(table_name, depth, path)` where
736	/// `path` is the chain of `table.field` segments traversed to reach each table.
737	///
738	/// The starting table itself is not included in the results.
739	pub fn tables_reachable_from(
740		&self,
741		start: &str,
742		max_depth: usize,
743	) -> Vec<(String, usize, Vec<String>)> {
744		let mut results = Vec::new();
745		if !self.tables.contains_key(start) {
746			return results;
747		}
748
749		// (current_table, depth, path_so_far)
750		let mut queue: VecDeque<(String, usize, Vec<String>)> = VecDeque::new();
751		let mut visited = HashSet::new();
752		visited.insert(start.to_string());
753		queue.push_back((start.to_string(), 0, Vec::new()));
754
755		while let Some((current, depth, path)) = queue.pop_front() {
756			if depth >= max_depth {
757				continue;
758			}
759			let Some(table) = self.tables.get(&current) else {
760				continue;
761			};
762			for field in &table.fields {
763				for link in &field.record_links {
764					let mut new_path = path.clone();
765					new_path.push(format!("{current}.{}", field.name));
766					if visited.insert(link.clone()) {
767						results.push((link.clone(), depth + 1, new_path.clone()));
768						queue.push_back((link.clone(), depth + 1, new_path));
769					}
770				}
771			}
772		}
773
774		results.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(&b.0)));
775		results
776	}
777
778	/// Tables that reference `target` via `record<target>` fields (reverse dependencies).
779	///
780	/// Returns `(referencing_table, field_name)` pairs. Useful for answering:
781	/// "What would break if I drop or rename this table?"
782	pub fn tables_referencing(&self, target: &str) -> Vec<(String, String)> {
783		let mut results = Vec::new();
784		let mut table_names: Vec<&str> = self.tables.keys().map(|s| s.as_str()).collect();
785		table_names.sort();
786		for table_name in table_names {
787			let Some(table) = self.tables.get(table_name) else {
788				continue;
789			};
790			for field in &table.fields {
791				if field.record_links.iter().any(|l| l == target) {
792					results.push((table_name.to_string(), field.name.clone()));
793				}
794			}
795		}
796		results
797	}
798
799	/// Tables that share a common `record<>` target with `table` (siblings).
800	///
801	/// Answers: "Which other tables also link to the same targets as this table?"
802	/// Returns `(sibling_table, shared_target, via_field)` triples.
803	pub fn siblings_of(&self, table: &str) -> Vec<(String, String, String)> {
804		let Some(source_table) = self.tables.get(table) else {
805			return Vec::new();
806		};
807
808		let targets: HashSet<&str> = source_table
809			.fields
810			.iter()
811			.flat_map(|f| f.record_links.iter().map(|l| l.as_str()))
812			.collect();
813
814		if targets.is_empty() {
815			return Vec::new();
816		}
817
818		let mut results = Vec::new();
819		let mut other_names: Vec<&str> = self
820			.tables
821			.keys()
822			.filter(|n| n.as_str() != table)
823			.map(|s| s.as_str())
824			.collect();
825		other_names.sort();
826
827		for other_name in other_names {
828			let Some(other_table) = self.tables.get(other_name) else {
829				continue;
830			};
831			for field in &other_table.fields {
832				for link in &field.record_links {
833					if targets.contains(link.as_str()) {
834						results.push((other_name.to_string(), link.clone(), field.name.clone()));
835					}
836				}
837			}
838		}
839		results
840	}
841
842	/// Build a full dependency tree rooted at `root`, following `record<>` links.
843	///
844	/// Cycles are detected: if a table appears as its own ancestor, the node
845	/// is marked with `is_cycle = true` and has no children (the recursion stops).
846	///
847	/// A `max_nodes` safety cap (1000) prevents exponential blowup on diamond-shaped
848	/// graphs where the same table is reachable through many paths.
849	pub fn dependency_tree(&self, root: &str, max_depth: usize) -> DependencyNode {
850		let mut visited = HashSet::new();
851		let mut node_count = 0usize;
852		self.build_dependency_subtree(root, None, max_depth, &mut visited, &mut node_count)
853	}
854
855	fn build_dependency_subtree(
856		&self,
857		table_name: &str,
858		via_field: Option<&str>,
859		remaining_depth: usize,
860		visited: &mut HashSet<String>,
861		node_count: &mut usize,
862	) -> DependencyNode {
863		*node_count += 1;
864
865		if !visited.insert(table_name.to_string()) {
866			return DependencyNode {
867				table: table_name.to_string(),
868				field: via_field.map(|s| s.to_string()),
869				children: Vec::new(),
870				is_cycle: true,
871			};
872		}
873
874		let mut children = Vec::new();
875		if remaining_depth > 0
876			&& *node_count < 1000
877			&& let Some(table) = self.tables.get(table_name)
878		{
879			let mut field_links: Vec<(&str, &str)> = Vec::new();
880			for field in &table.fields {
881				for link in &field.record_links {
882					field_links.push((field.name.as_str(), link.as_str()));
883				}
884			}
885			field_links.sort_by(|a, b| a.0.cmp(b.0).then_with(|| a.1.cmp(b.1)));
886
887			for (field_name, link_target) in field_links {
888				if *node_count >= 1000 {
889					break;
890				}
891				let child = self.build_dependency_subtree(
892					link_target,
893					Some(field_name),
894					remaining_depth - 1,
895					visited,
896					node_count,
897				);
898				children.push(child);
899			}
900		}
901
902		visited.remove(table_name);
903
904		DependencyNode {
905			table: table_name.to_string(),
906			field: via_field.map(|s| s.to_string()),
907			children,
908			is_cycle: false,
909		}
910	}
911
912	/// Generate a markdown graph tree for all tables in the schema.
913	///
914	/// Builds a dependency tree for each table and formats it as an
915	/// indented markdown tree view. Used by the LSP to cache `graph.md`.
916	pub fn build_graph_tree_markdown(&self) -> String {
917		let mut table_names: Vec<&str> = self.table_names().collect();
918		table_names.sort();
919
920		if table_names.is_empty() {
921			return "# Schema Graph\n\nNo tables defined.\n".to_string();
922		}
923
924		let mut out = String::from("# Schema Graph\n\n");
925
926		// Summary: tables with outgoing links
927		let mut link_count = 0usize;
928		for name in &table_names {
929			if let Some(table) = self.tables.get(*name) {
930				for field in &table.fields {
931					link_count += field.record_links.len();
932				}
933			}
934		}
935		out.push_str(&format!(
936			"**{} tables, {} record links**\n\n",
937			table_names.len(),
938			link_count,
939		));
940
941		// Dependency tree per table (only tables that have outgoing or incoming links)
942		for name in &table_names {
943			let tree = self.dependency_tree(name, 5);
944			if tree.children.is_empty() && self.tables_referencing(name).is_empty() {
945				continue;
946			}
947			out.push_str(&format!("## {name}\n\n"));
948
949			// Forward deps
950			if !tree.children.is_empty() {
951				out.push_str("**Depends on:**\n```\n");
952				for child in &tree.children {
953					format_dependency_node(&mut out, child, 0);
954				}
955				out.push_str("```\n\n");
956			}
957
958			// Reverse deps
959			let refs = self.tables_referencing(name);
960			if !refs.is_empty() {
961				out.push_str("**Referenced by:**\n");
962				for (ref_table, ref_field) in &refs {
963					out.push_str(&format!("- `{ref_table}.{ref_field}`\n"));
964				}
965				out.push('\n');
966			}
967
968			// Siblings
969			let siblings = self.siblings_of(name);
970			if !siblings.is_empty() {
971				out.push_str("**Siblings (shared targets):**\n");
972				let mut seen = HashSet::new();
973				for (sib, target, field) in &siblings {
974					let key = format!("{sib}.{field}->{target}");
975					if seen.insert(key) {
976						out.push_str(&format!(
977							"- `{sib}` (both link to `{target}` via `.{field}`)\n"
978						));
979					}
980				}
981				out.push('\n');
982			}
983		}
984
985		out
986	}
987
988	/// Generate markdown documentation from schema definitions.
989	///
990	/// Includes tables with fields (type, default, comment), indexes, events,
991	/// and function signatures with parameters and return types.
992	pub fn build_docs_markdown(&self) -> String {
993		let mut out = String::from("# Schema Documentation\n\n");
994
995		let mut table_names: Vec<&str> = self.table_names().collect();
996		table_names.sort();
997
998		if !table_names.is_empty() {
999			out.push_str("## Tables\n\n");
1000			for name in &table_names {
1001				let table = match self.table(name) {
1002					Some(t) => t,
1003					None => continue,
1004				};
1005				out.push_str(&format!("### {name}\n\n"));
1006				if let Some(comment) = &table.comment {
1007					out.push_str(comment);
1008					out.push_str("\n\n");
1009				}
1010
1011				let schema_label = if table.full {
1012					"SCHEMAFULL"
1013				} else {
1014					"SCHEMALESS"
1015				};
1016				out.push_str(&format!("*{schema_label}*\n\n"));
1017
1018				if !table.fields.is_empty() {
1019					out.push_str("| Field | Type | Default | Comment |\n");
1020					out.push_str("|-------|------|---------|--------|\n");
1021					for field in &table.fields {
1022						let kind = field.kind.as_deref().unwrap_or("");
1023						let default = field.default.as_deref().unwrap_or("");
1024						let comment = field.comment.as_deref().unwrap_or("");
1025						out.push_str(&format!(
1026							"| {} | {} | {} | {} |\n",
1027							field.name,
1028							escape_markdown_table(kind),
1029							escape_markdown_table(default),
1030							escape_markdown_table(comment),
1031						));
1032					}
1033					out.push('\n');
1034				}
1035
1036				if !table.indexes.is_empty() {
1037					out.push_str("**Indexes:**\n");
1038					for idx in &table.indexes {
1039						let unique_label = if idx.unique { " (UNIQUE)" } else { "" };
1040						let cols = idx.columns.join(", ");
1041						out.push_str(&format!("- `{}`{unique_label} on `{cols}`\n", idx.name));
1042					}
1043					out.push('\n');
1044				}
1045
1046				if !table.events.is_empty() {
1047					out.push_str("**Events:**\n");
1048					for ev in &table.events {
1049						let comment_suffix = ev
1050							.comment
1051							.as_ref()
1052							.map(|c| format!(" -- {c}"))
1053							.unwrap_or_default();
1054						out.push_str(&format!("- `{}`{comment_suffix}\n", ev.name));
1055					}
1056					out.push('\n');
1057				}
1058			}
1059		}
1060
1061		let mut fn_names: Vec<&str> = self.function_names().collect();
1062		fn_names.sort();
1063
1064		if !fn_names.is_empty() {
1065			out.push_str("## Functions\n\n");
1066			for name in &fn_names {
1067				let func = match self.function(name) {
1068					Some(f) => f,
1069					None => continue,
1070				};
1071				out.push_str(&format!("### fn::{name}\n\n"));
1072				if let Some(comment) = &func.comment {
1073					out.push_str(comment);
1074					out.push_str("\n\n");
1075				}
1076
1077				if !func.args.is_empty() {
1078					let args_str: Vec<String> = func
1079						.args
1080						.iter()
1081						.map(|(n, k)| {
1082							let name = n.strip_prefix('$').unwrap_or(n);
1083							format!("${name}: {k}")
1084						})
1085						.collect();
1086					out.push_str(&format!("**Parameters:** `{}`\n", args_str.join(", ")));
1087				}
1088
1089				if let Some(ret) = &func.returns {
1090					out.push_str(&format!("**Returns:** `{ret}`\n"));
1091				}
1092
1093				out.push('\n');
1094			}
1095		}
1096
1097		out
1098	}
1099}
1100
1101fn escape_markdown_table(s: &str) -> String {
1102	s.replace('|', "\\|")
1103}
1104
1105fn format_dependency_node(out: &mut String, node: &DependencyNode, indent: usize) {
1106	let prefix = "  ".repeat(indent);
1107	let field_label = node
1108		.field
1109		.as_deref()
1110		.map(|f| format!(".{f} -> "))
1111		.unwrap_or_default();
1112	let cycle_label = if node.is_cycle { " (cycle)" } else { "" };
1113	out.push_str(&format!(
1114		"{prefix}{field_label}[{}]{cycle_label}\n",
1115		node.table
1116	));
1117	if !node.is_cycle {
1118		for child in &node.children {
1119			format_dependency_node(out, child, indent + 1);
1120		}
1121	}
1122}
1123
1124// ─── Token & AST Extraction ───
1125
1126fn token_text<'a>(source: &'a str, token: &crate::upstream::syn::token::Token) -> &'a str {
1127	let start = token.span.offset as usize;
1128	let end = (token.span.offset + token.span.len) as usize;
1129	if end <= source.len() {
1130		&source[start..end]
1131	} else {
1132		tracing::warn!(
1133			"Token span [{start}..{end}] exceeds source length {}, returning empty",
1134			source.len()
1135		);
1136		""
1137	}
1138}
1139
1140fn expr_to_string(expr: &crate::Expr) -> String {
1141	let mut s = String::new();
1142	expr.fmt_sql(&mut s, SqlFormat::SingleLine);
1143	let s = s.strip_prefix('`').unwrap_or(&s);
1144	let s = s.strip_suffix('`').unwrap_or(s);
1145	let s = s.strip_prefix('\u{27E8}').unwrap_or(s);
1146	let s = s.strip_suffix('\u{27E9}').unwrap_or(s);
1147	s.to_string()
1148}
1149
1150/// Check if a table's NS/DB matches the requested scope.
1151/// Tables with no NS/DB (None) match any scope (they're in the default context).
1152fn scope_matches(
1153	table_ns: &Option<String>,
1154	table_db: &Option<String>,
1155	filter_ns: Option<&str>,
1156	filter_db: Option<&str>,
1157) -> bool {
1158	let ns_ok = match (table_ns, filter_ns) {
1159		(None, _) => true,
1160		(Some(t), Some(f)) => t.eq_ignore_ascii_case(f),
1161		(Some(_), None) => true,
1162	};
1163	let db_ok = match (table_db, filter_db) {
1164		(None, _) => true,
1165		(Some(t), Some(f)) => t.eq_ignore_ascii_case(f),
1166		(Some(_), None) => true,
1167	};
1168	ns_ok && db_ok
1169}
1170
1171fn extract_comment(expr: &crate::Expr) -> Option<String> {
1172	use crate::upstream::sql::Literal;
1173	match expr {
1174		crate::Expr::Literal(Literal::None) => None,
1175		crate::Expr::Literal(Literal::String(s)) => Some(s.clone()),
1176		other => {
1177			let s = expr_to_string(other);
1178			let trimmed = s.trim_matches('\'').trim_matches('"');
1179			if trimmed.is_empty() {
1180				None
1181			} else {
1182				Some(trimmed.to_string())
1183			}
1184		}
1185	}
1186}
1187
1188fn extract_record_links(kind: &crate::Kind) -> Vec<String> {
1189	let mut s = String::new();
1190	kind.fmt_sql(&mut s, SqlFormat::SingleLine);
1191
1192	let mut links = Vec::new();
1193	let mut remaining = s.as_str();
1194	while let Some(pos) = remaining.find("record<") {
1195		let after = &remaining[pos + 7..];
1196		if let Some(end) = after.find('>') {
1197			let tables_str = &after[..end];
1198			for table in tables_str.split('|') {
1199				let table = table.trim();
1200				if !table.is_empty() {
1201					links.push(table.to_string());
1202				}
1203			}
1204		}
1205		remaining = &remaining[pos + 7..];
1206	}
1207	links
1208}