Skip to main content

surql_parser/
lint.rs

1//! SurrealQL linter — static analysis for common schema issues.
2//!
3//! Provides lint checks for `.surql` files including missing types,
4//! schemaless tables, SELECT *, missing indexes, and unused functions.
5//!
6//! # Example
7//!
8//! ```
9//! use surql_parser::lint::{lint_schema, LintSeverity};
10//! use surql_parser::SchemaGraph;
11//! use std::path::PathBuf;
12//!
13//! let source = "
14//!     DEFINE TABLE user;
15//!     DEFINE FIELD name ON user;
16//!     SELECT * FROM user;
17//! ";
18//! let graph = SchemaGraph::from_source(source).unwrap();
19//! let sources = vec![(PathBuf::from("schema.surql"), source.to_string())];
20//! let results = lint_schema(&graph, &sources);
21//! assert!(results.iter().any(|r| r.code == "schemaless-table"));
22//! assert!(results.iter().any(|r| r.code == "missing-type"));
23//! assert!(results.iter().any(|r| r.code == "select-star"));
24//! ```
25
26use std::collections::HashSet;
27use std::path::PathBuf;
28
29use crate::SchemaGraph;
30
31/// Severity level for a lint result.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum LintSeverity {
34	Warning,
35	Info,
36}
37
38impl std::fmt::Display for LintSeverity {
39	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40		match self {
41			LintSeverity::Warning => write!(f, "warn"),
42			LintSeverity::Info => write!(f, "info"),
43		}
44	}
45}
46
47/// A single lint finding.
48#[derive(Debug, Clone)]
49pub struct LintResult {
50	pub file: String,
51	pub line: u32,
52	pub col: u32,
53	pub code: String,
54	pub message: String,
55	pub severity: LintSeverity,
56}
57
58impl std::fmt::Display for LintResult {
59	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60		write!(
61			f,
62			"{}:{}:{} [{}] {}",
63			self.file, self.line, self.col, self.code, self.message
64		)
65	}
66}
67
68/// Run all lint checks against a schema graph and its source files.
69///
70/// `sources` is a list of (file_path, file_content) pairs so we can report
71/// accurate line/column positions and detect SELECT * patterns.
72pub fn lint_schema(schema: &SchemaGraph, sources: &[(PathBuf, String)]) -> Vec<LintResult> {
73	let mut results = Vec::new();
74
75	lint_missing_type(schema, sources, &mut results);
76	lint_schemaless_table(schema, sources, &mut results);
77	lint_select_star(sources, &mut results);
78	lint_missing_index(schema, sources, &mut results);
79	lint_unused_function(schema, sources, &mut results);
80
81	results.sort_by(|a, b| {
82		a.file
83			.cmp(&b.file)
84			.then_with(|| a.line.cmp(&b.line))
85			.then_with(|| a.col.cmp(&b.col))
86	});
87
88	results
89}
90
91/// Apply auto-fixes for fixable lints. Returns the fixed source content
92/// and the number of fixes applied.
93///
94/// Currently fixable: `missing-type` (adds `TYPE any`).
95pub fn apply_fixes(source: &str) -> (String, u32) {
96	let mut fixed = String::new();
97	let mut fix_count = 0u32;
98
99	for line in source.lines() {
100		let trimmed = line.trim();
101		let upper = trimmed.to_uppercase();
102
103		// Only auto-fix single-line DEFINE FIELD statements (entire statement on one line
104		// ending with semicolon). Multi-line DEFINE FIELD spans are skipped to avoid corruption.
105		if upper.starts_with("DEFINE FIELD")
106			&& trimmed.ends_with(';')
107			&& !upper.contains(" TYPE ")
108			&& !upper.contains(" FLEXIBLE ")
109		{
110			let semicolon_stripped = trimmed.strip_suffix(';').unwrap_or(trimmed);
111			let leading_ws: &str = &line[..line.len() - line.trim_start().len()];
112			fixed.push_str(leading_ws);
113			fixed.push_str(semicolon_stripped);
114			fixed.push_str(" TYPE any;");
115			fixed.push('\n');
116			fix_count += 1;
117		} else {
118			fixed.push_str(line);
119			fixed.push('\n');
120		}
121	}
122
123	if source.ends_with('\n') || fixed.is_empty() {
124		// already has trailing newline
125	} else {
126		// remove extra trailing newline we added
127		fixed.pop();
128	}
129
130	(fixed, fix_count)
131}
132
133// ─── Individual Lints ───
134
135/// Warn on DEFINE FIELD without TYPE annotation.
136fn lint_missing_type(
137	schema: &SchemaGraph,
138	sources: &[(PathBuf, String)],
139	results: &mut Vec<LintResult>,
140) {
141	for table_name in schema.table_names() {
142		for field in schema.fields_of(table_name) {
143			if field.kind.is_none() {
144				let Some((file, line, col)) = find_field_location(sources, table_name, &field.name)
145				else {
146					continue;
147				};
148				results.push(LintResult {
149					file,
150					line,
151					col,
152					code: "missing-type".into(),
153					message: format!(
154						"DEFINE FIELD {} ON {} \u{2014} no TYPE specified",
155						field.name, table_name
156					),
157					severity: LintSeverity::Warning,
158				});
159			}
160		}
161	}
162}
163
164/// Warn on DEFINE TABLE without SCHEMAFULL.
165fn lint_schemaless_table(
166	schema: &SchemaGraph,
167	sources: &[(PathBuf, String)],
168	results: &mut Vec<LintResult>,
169) {
170	for table_name in schema.table_names() {
171		if let Some(table) = schema.table(table_name)
172			&& !table.full
173		{
174			let Some((file, line, col)) = find_define_table_location(sources, table_name) else {
175				continue;
176			};
177			results.push(LintResult {
178				file,
179				line,
180				col,
181				code: "schemaless-table".into(),
182				message: format!("DEFINE TABLE {} \u{2014} consider SCHEMAFULL", table_name),
183				severity: LintSeverity::Warning,
184			});
185		}
186	}
187}
188
189/// Warn on SELECT * usage.
190fn lint_select_star(sources: &[(PathBuf, String)], results: &mut Vec<LintResult>) {
191	for (path, content) in sources {
192		let file_str = path.display().to_string();
193		for (line_num, line) in content.lines().enumerate() {
194			let trimmed = line.trim();
195			if trimmed.starts_with("--") || trimmed.starts_with("//") {
196				continue;
197			}
198			let upper = line.to_uppercase();
199			if let Some(pos) = upper.find("SELECT *") {
200				// Skip if SELECT * appears after a comment marker or inside a string
201				let before_select = &line[..pos];
202				if before_select.contains("--")
203					|| before_select.contains("//")
204					|| is_inside_string(before_select)
205				{
206					continue;
207				}
208				let after = &upper[pos + 8..];
209				if after.trim_start().starts_with("FROM")
210					|| after.trim_start().starts_with(',')
211					|| after.trim_start().is_empty()
212				{
213					results.push(LintResult {
214						file: file_str.clone(),
215						line: (line_num + 1) as u32,
216						col: (pos + 1) as u32,
217						code: "select-star".into(),
218						message: format!(
219							"{} \u{2014} specify fields for production",
220							trimmed.trim_end_matches(';')
221						),
222						severity: LintSeverity::Info,
223					});
224				}
225			}
226		}
227	}
228}
229
230/// Check if position is likely inside a string literal by counting unescaped quotes.
231/// An odd number of single or double quotes before a position means we are inside a string.
232fn is_inside_string(before: &str) -> bool {
233	let single_quotes = before.chars().filter(|&c| c == '\'').count();
234	let double_quotes = before.chars().filter(|&c| c == '"').count();
235	single_quotes % 2 != 0 || double_quotes % 2 != 0
236}
237
238/// Warn on tables with 5+ fields but no indexes.
239fn lint_missing_index(
240	schema: &SchemaGraph,
241	sources: &[(PathBuf, String)],
242	results: &mut Vec<LintResult>,
243) {
244	for table_name in schema.table_names() {
245		let fields = schema.fields_of(table_name);
246		let indexes = schema.indexes_of(table_name);
247		if fields.len() >= 5 && indexes.is_empty() {
248			let Some((file, line, col)) = find_define_table_location(sources, table_name) else {
249				continue;
250			};
251			results.push(LintResult {
252				file,
253				line,
254				col,
255				code: "missing-index".into(),
256				message: format!(
257					"DEFINE TABLE {} has {} fields but no indexes",
258					table_name,
259					fields.len()
260				),
261				severity: LintSeverity::Info,
262			});
263		}
264	}
265}
266
267/// Warn on DEFINE FUNCTION not called anywhere in the project.
268fn lint_unused_function(
269	schema: &SchemaGraph,
270	sources: &[(PathBuf, String)],
271	results: &mut Vec<LintResult>,
272) {
273	let all_content: String = sources
274		.iter()
275		.map(|(_, c)| c.as_str())
276		.collect::<Vec<_>>()
277		.join("\n");
278	let fn_names: Vec<&str> = schema.function_names().collect();
279
280	let mut called: HashSet<&str> = HashSet::new();
281	for name in &fn_names {
282		// Match fn::name only at word boundaries: followed by '(', ' ', ';', ')', ',', or EOL
283		let ref_pattern = format!("fn::{name}");
284		for line in all_content.lines() {
285			let trimmed = line.trim();
286			if trimmed.to_uppercase().starts_with("DEFINE FUNCTION") {
287				continue;
288			}
289			// Skip comment lines
290			if trimmed.starts_with("--") || trimmed.starts_with("//") {
291				continue;
292			}
293			if let Some(pos) = line.find(&ref_pattern) {
294				let after_pos = pos + ref_pattern.len();
295				let next_char = line[after_pos..].chars().next();
296				// Require word boundary after the function name
297				let at_boundary = match next_char {
298					None => true,
299					Some(c) => !c.is_alphanumeric() && c != '_' && c != ':',
300				};
301				if at_boundary {
302					called.insert(name);
303					break;
304				}
305			}
306		}
307	}
308
309	for name in &fn_names {
310		if !called.contains(name) {
311			let Some((file, line, col)) = find_define_function_location(sources, name) else {
312				continue;
313			};
314			results.push(LintResult {
315				file,
316				line,
317				col,
318				code: "unused-function".into(),
319				message: format!("fn::{name} is defined but never called in the project",),
320				severity: LintSeverity::Info,
321			});
322		}
323	}
324}
325
326// ─── Location Finding ───
327
328fn find_field_location(
329	sources: &[(PathBuf, String)],
330	table_name: &str,
331	field_name: &str,
332) -> Option<(String, u32, u32)> {
333	let pattern_upper = format!("DEFINE FIELD {} ON {}", field_name, table_name).to_uppercase();
334	let pattern_upper_table =
335		format!("DEFINE FIELD {} ON TABLE {}", field_name, table_name).to_uppercase();
336
337	for (path, content) in sources {
338		for (line_num, line) in content.lines().enumerate() {
339			let upper = line.to_uppercase();
340			if upper.contains(&pattern_upper) || upper.contains(&pattern_upper_table) {
341				let col = upper.find("DEFINE FIELD").map(|p| p + 1).unwrap_or(1);
342				return Some((
343					path.display().to_string(),
344					(line_num + 1) as u32,
345					col as u32,
346				));
347			}
348		}
349	}
350	None
351}
352
353fn find_define_table_location(
354	sources: &[(PathBuf, String)],
355	table_name: &str,
356) -> Option<(String, u32, u32)> {
357	let pattern_upper = format!("DEFINE TABLE {}", table_name).to_uppercase();
358
359	for (path, content) in sources {
360		for (line_num, line) in content.lines().enumerate() {
361			let upper = line.to_uppercase();
362			if upper.contains(&pattern_upper) {
363				let col = upper.find("DEFINE TABLE").map(|p| p + 1).unwrap_or(1);
364				return Some((
365					path.display().to_string(),
366					(line_num + 1) as u32,
367					col as u32,
368				));
369			}
370		}
371	}
372	None
373}
374
375fn find_define_function_location(
376	sources: &[(PathBuf, String)],
377	fn_name: &str,
378) -> Option<(String, u32, u32)> {
379	let pattern = format!("fn::{fn_name}");
380
381	for (path, content) in sources {
382		for (line_num, line) in content.lines().enumerate() {
383			let upper = line.to_uppercase();
384			if upper.contains("DEFINE FUNCTION") && line.contains(&pattern) {
385				let col = line.find("DEFINE").map(|p| p + 1).unwrap_or(1);
386				return Some((
387					path.display().to_string(),
388					(line_num + 1) as u32,
389					col as u32,
390				));
391			}
392		}
393	}
394	None
395}
396
397#[cfg(test)]
398mod tests {
399	use super::*;
400
401	fn lint_source(source: &str) -> Vec<LintResult> {
402		let graph = SchemaGraph::from_source(source).unwrap();
403		let sources = vec![(PathBuf::from("schema.surql"), source.to_string())];
404		lint_schema(&graph, &sources)
405	}
406
407	#[test]
408	fn should_detect_missing_type_on_field() {
409		let results = lint_source("DEFINE TABLE user SCHEMAFULL;\nDEFINE FIELD name ON user;\n");
410		let missing = results.iter().filter(|r| r.code == "missing-type").count();
411		assert_eq!(missing, 1, "expected 1 missing-type lint, got: {results:?}");
412	}
413
414	#[test]
415	fn should_not_flag_field_with_type() {
416		let results =
417			lint_source("DEFINE TABLE user SCHEMAFULL;\nDEFINE FIELD name ON user TYPE string;\n");
418		let missing = results.iter().filter(|r| r.code == "missing-type").count();
419		assert_eq!(
420			missing, 0,
421			"field with TYPE should not be flagged: {results:?}"
422		);
423	}
424
425	#[test]
426	fn should_detect_schemaless_table() {
427		let results = lint_source("DEFINE TABLE post;\n");
428		let schemaless = results
429			.iter()
430			.filter(|r| r.code == "schemaless-table")
431			.count();
432		assert_eq!(
433			schemaless, 1,
434			"expected 1 schemaless-table lint: {results:?}"
435		);
436	}
437
438	#[test]
439	fn should_not_flag_schemafull_table() {
440		let results = lint_source("DEFINE TABLE post SCHEMAFULL;\n");
441		let schemaless = results
442			.iter()
443			.filter(|r| r.code == "schemaless-table")
444			.count();
445		assert_eq!(
446			schemaless, 0,
447			"SCHEMAFULL table should not be flagged: {results:?}"
448		);
449	}
450
451	#[test]
452	fn should_detect_select_star() {
453		let results = lint_source("DEFINE TABLE user SCHEMAFULL;\nSELECT * FROM user;\n");
454		let stars = results.iter().filter(|r| r.code == "select-star").count();
455		assert_eq!(stars, 1, "expected 1 select-star lint: {results:?}");
456	}
457
458	#[test]
459	fn should_not_flag_explicit_select() {
460		let results = lint_source("DEFINE TABLE user SCHEMAFULL;\nSELECT name, age FROM user;\n");
461		let stars = results.iter().filter(|r| r.code == "select-star").count();
462		assert_eq!(
463			stars, 0,
464			"explicit SELECT should not be flagged: {results:?}"
465		);
466	}
467
468	#[test]
469	fn should_detect_missing_index_with_many_fields() {
470		let source = "\
471DEFINE TABLE user SCHEMAFULL;
472DEFINE FIELD name ON user TYPE string;
473DEFINE FIELD email ON user TYPE string;
474DEFINE FIELD age ON user TYPE int;
475DEFINE FIELD bio ON user TYPE string;
476DEFINE FIELD avatar ON user TYPE string;
477";
478		let results = lint_source(source);
479		let missing_idx = results.iter().filter(|r| r.code == "missing-index").count();
480		assert_eq!(
481			missing_idx, 1,
482			"expected 1 missing-index lint for 5 fields with no index: {results:?}"
483		);
484	}
485
486	#[test]
487	fn should_not_flag_table_with_index() {
488		let source = "\
489DEFINE TABLE user SCHEMAFULL;
490DEFINE FIELD name ON user TYPE string;
491DEFINE FIELD email ON user TYPE string;
492DEFINE FIELD age ON user TYPE int;
493DEFINE FIELD bio ON user TYPE string;
494DEFINE FIELD avatar ON user TYPE string;
495DEFINE INDEX email_idx ON user FIELDS email UNIQUE;
496";
497		let results = lint_source(source);
498		let missing_idx = results.iter().filter(|r| r.code == "missing-index").count();
499		assert_eq!(
500			missing_idx, 0,
501			"table with index should not be flagged: {results:?}"
502		);
503	}
504
505	#[test]
506	fn should_detect_unused_function() {
507		let source = "\
508DEFINE TABLE user SCHEMAFULL;
509DEFINE FUNCTION fn::greet($name: string) { RETURN 'Hello, ' + $name; };
510";
511		let results = lint_source(source);
512		let unused = results
513			.iter()
514			.filter(|r| r.code == "unused-function")
515			.count();
516		assert_eq!(unused, 1, "expected 1 unused-function lint: {results:?}");
517	}
518
519	#[test]
520	fn should_not_flag_called_function() {
521		let source = "\
522DEFINE TABLE user SCHEMAFULL;
523DEFINE FUNCTION fn::greet($name: string) { RETURN 'Hello, ' + $name; };
524";
525		let graph = SchemaGraph::from_source(source).unwrap();
526		let caller = "LET $greeting = fn::greet('World');";
527		let sources = vec![
528			(PathBuf::from("schema.surql"), source.to_string()),
529			(PathBuf::from("queries.surql"), caller.to_string()),
530		];
531		let results = lint_schema(&graph, &sources);
532		let unused = results
533			.iter()
534			.filter(|r| r.code == "unused-function")
535			.count();
536		assert_eq!(
537			unused, 0,
538			"called function should not be flagged: {results:?}"
539		);
540	}
541
542	#[test]
543	fn should_apply_missing_type_fix() {
544		let source = "DEFINE FIELD name ON user;\nDEFINE FIELD age ON user TYPE int;\n";
545		let (fixed, count) = apply_fixes(source);
546		assert_eq!(count, 1);
547		assert!(fixed.contains("DEFINE FIELD name ON user TYPE any;"));
548		assert!(fixed.contains("DEFINE FIELD age ON user TYPE int;"));
549	}
550
551	#[test]
552	fn should_not_fix_field_with_type() {
553		let source = "DEFINE FIELD name ON user TYPE string;\n";
554		let (fixed, count) = apply_fixes(source);
555		assert_eq!(count, 0);
556		assert_eq!(fixed, source);
557	}
558
559	#[test]
560	fn should_skip_comment_lines_for_select_star() {
561		let results = lint_source("-- SELECT * FROM user;\n");
562		let stars = results.iter().filter(|r| r.code == "select-star").count();
563		assert_eq!(stars, 0, "comments should not trigger select-star lint");
564	}
565
566	#[test]
567	fn should_skip_select_star_inside_string_literal() {
568		let source = "DEFINE TABLE user SCHEMAFULL;\nLET $q = 'SELECT * FROM user';\n";
569		let graph = SchemaGraph::from_source(source).unwrap();
570		let sources = vec![(PathBuf::from("schema.surql"), source.to_string())];
571		let results = lint_schema(&graph, &sources);
572		let stars = results.iter().filter(|r| r.code == "select-star").count();
573		assert_eq!(
574			stars, 0,
575			"SELECT * inside string literal should not be flagged"
576		);
577	}
578
579	#[test]
580	fn should_skip_select_star_after_inline_comment() {
581		let source = "DEFINE TABLE user SCHEMAFULL;\nLET $x = 1; -- SELECT * FROM user;\n";
582		let graph = SchemaGraph::from_source(source).unwrap();
583		let sources = vec![(PathBuf::from("schema.surql"), source.to_string())];
584		let results = lint_schema(&graph, &sources);
585		let stars = results.iter().filter(|r| r.code == "select-star").count();
586		assert_eq!(
587			stars, 0,
588			"SELECT * after inline comment should not be flagged"
589		);
590	}
591
592	#[test]
593	fn should_not_fix_multiline_define_field() {
594		let source = "DEFINE FIELD name ON user\n\tDEFAULT 'test';\n";
595		let (fixed, count) = apply_fixes(source);
596		assert_eq!(count, 0, "multi-line DEFINE FIELD should not be auto-fixed");
597		assert_eq!(fixed, source);
598	}
599
600	#[test]
601	fn should_not_flag_fn_name_prefix_as_used() {
602		let source = "\
603DEFINE TABLE user SCHEMAFULL;
604DEFINE FUNCTION fn::get($id: string) { RETURN 1; };
605DEFINE FUNCTION fn::get_all() { RETURN 2; };
606LET $x = fn::get_all();
607";
608		let graph = SchemaGraph::from_source(source).unwrap();
609		let sources = vec![(PathBuf::from("schema.surql"), source.to_string())];
610		let results = lint_schema(&graph, &sources);
611		let unused: Vec<&str> = results
612			.iter()
613			.filter(|r| r.code == "unused-function")
614			.map(|r| r.message.as_str())
615			.collect();
616		assert!(
617			unused.iter().any(|m| m.contains("fn::get ")),
618			"fn::get should be flagged as unused (fn::get_all is called, not fn::get): {unused:?}"
619		);
620	}
621}