pub mod build_schema;
pub mod emit;
pub mod examples;
pub mod lint;
pub mod parse;
pub mod translate;
use std::path::PathBuf;
use clap::{Parser, Subcommand};
use crate::dialect::DialectId;
use crate::encoding::Encoding;
use crate::error::Result;
use crate::lint::format::Format as LintFormat;
const BUILD_SCHEMA_LONG_ABOUT: &str = "\
Compile a reusable schema artifact from one or more SQL files.
Reads each `--schema` file in CLI order, replays the DDL surface
(CREATE/ALTER/DROP TABLE, CREATE INDEX, foreign-key constraints,
CREATE DATABASE / USE for per-database namespacing), and emits a JSON
artifact that captures the *current* state of the schema — not just the
initial CREATE.
Use cases:
- Compile a long migration history once, lint many times against the
compiled artifact (cheap on every CI run).
- Check the artifact into the repo so contributors lint against the
same schema without each running the full migration replay.
- Mix `.json` + late `.sql` migrations on top:
sqlt lint --from mariadb \\
--schema schema.json \\
--schema migrations/late.sql query.sql
What's tracked:
- tables (per database)
- columns (name, data type, nullable)
- indexes (named, unique, primary, fulltext, spatial; functional via
the rendered SQL expression)
- primary keys, foreign keys (resolved through the USE cursor)
Statements that don't affect the schema (INSERT, GRANT, DELIMITER +
stored procedure bodies, …) emit `note: skipping <kind> at <file>:<line>`
on stderr but never error.
The artifact records the sqlt version it was built with — `sqlt lint`
warns on major.minor mismatch but still tries to load.
For real-world examples (multi-database, latin1, late-migration mixing,
etc.) run:
sqlt build-schema --examples";
const LINT_LONG_ABOUT: &str = "\
Analyze SQL for pitfalls and improvement suggestions.
Runs a curated ruleset over the parsed AST and reports diagnostics with
stable rule IDs (e.g. `SQLT0500`), short slugs (`select-star`), and inline
suggestions.
Rule categories:
- raw (SQLT00xx) Raw passthrough (off by default; see -v)
- dialect-xc (SQLT01xx) Dialect cross-contamination
- pre-flight (SQLT02xx) Translation pre-flight (only with --to)
- joins (SQLT03xx) Implicit cross joins, NATURAL JOIN, ON 1=1
- subquery (SQLT04xx) IN (SELECT) -> EXISTS, correlated subqueries
- perf (SQLT05xx) SELECT *, leading-wildcard LIKE, fn-on-column
- correctness (SQLT06xx) = NULL, UPDATE/DELETE without WHERE
- style (SQLT07xx) Unaliased derived tables, LIMIT without ORDER BY
- ddl (SQLT08xx) Float-for-money, VARCHAR without length
Common discoverability flags:
- sqlt lint --examples in-depth usage examples
- sqlt lint --list-rules every registered rule with one-line summary
- sqlt lint --explain <ID> long-form documentation for a rule
Output formats (--format):
- text grep-friendly single-line per finding (default)
- pretty grouped per file with snippet pointer and inline rule explanation
- json structured for tooling / CI ingestion
- sarif SARIF 2.1.0 for GitHub code-scanning integration
Exit-code controls:
- --exit-on error (default) exit 1 only on errors
- --exit-on warning exit 1 on errors and warnings
- --exit-on info exit 1 on any finding
- --severity is OUTPUT-only and does NOT affect --exit-on";
const TOP_LEVEL_LONG_ABOUT: &str = "\
Multi-dialect SQL parser, translator, and linter.
Supported dialects:
- mysql MySQL 5.7+ / 8.0
- mariadb MariaDB — see note below
- postgres (aliases: postgresql, pg) PostgreSQL
- mssql (aliases: tsql, sqlserver) Microsoft SQL Server / T-SQL
- sqlite SQLite
- generic Permissive fallback dialect
About `--from mariadb`:
At the parser layer MariaDB uses sqlparser's MySqlDialect (a wrapper
fails the dialect_of!(MySqlDialect) downcast checks scattered through
sqlparser, silently disabling MySQL-superset features MariaDB needs).
MariaDB-specific behaviour lives one layer up:
- input preprocessor unwraps mariadb-dump conditional comments
(/*!NNN ... */, /*M!NNN ... */) and relaxes bare --<EOL>
- per-statement fallback wraps unparseable MariaDB syntax as
`Raw` fragments classified by reason (system_versioning,
create_package, optimization_hint, delimiter, ...)
- capability table treats MariaDB distinctly from MySQL for
RETURNING, CREATE SEQUENCE, system versioning, etc.
- lint rules can branch on the source dialect (e.g. SQLT0104
fires for --from mysql but not --from mariadb)
Supported encodings (--encoding / -e):
- utf-8 (default; always used for JSON I/O)
- iso-8859-1 (alias: latin1)
- windows-1252 (alias: cp1252, win1252)
Reads input from:
- a file path (positional argument)
- stdin (when no path is given, or when `-` is passed)
Discoverability:
- sqlt --examples top-level overview + per-command examples
- sqlt <COMMAND> --help full long-form help for any subcommand
- sqlt <COMMAND> --examples in-depth usage examples
- sqlt lint --list-rules every registered lint rule with id + summary
- sqlt lint --explain <ID> long-form documentation for one rule
Exit codes:
- 0 clean
- 1 parse error, encoding error, or lint findings >= --exit-on threshold
- 2 usage error (unknown dialect, unknown rule, bad flag combination)
- 3 `translate --strict` saw at least one warning";
#[derive(Debug, Parser)]
#[command(
name = "sqlt",
version,
about = "Multi-dialect SQL parser, translator, and linter",
long_about = TOP_LEVEL_LONG_ABOUT,
arg_required_else_help = false,
subcommand_required = false,
)]
pub struct Cli {
#[arg(long = "examples", global = false)]
pub examples: bool,
#[command(subcommand)]
pub command: Option<Command>,
}
#[derive(Debug, Subcommand)]
pub enum Command {
Parse(ParseArgs),
Emit(EmitArgs),
Translate(TranslateArgs),
#[command(long_about = LINT_LONG_ABOUT)]
Lint(LintArgs),
#[command(long_about = BUILD_SCHEMA_LONG_ABOUT)]
BuildSchema(BuildSchemaArgs),
}
#[derive(Debug, clap::Args)]
pub struct ParseArgs {
#[arg(long = "from", value_parser = parse_dialect, required_unless_present = "examples")]
pub from: Option<DialectId>,
#[arg(long)]
pub pretty: bool,
#[arg(long, short = 'e', value_parser = parse_encoding, default_value = "utf-8")]
pub encoding: Encoding,
pub input: Option<PathBuf>,
#[arg(long = "examples")]
pub examples: bool,
}
#[derive(Debug, clap::Args)]
pub struct EmitArgs {
#[arg(long = "to", value_parser = parse_dialect)]
pub to: Option<DialectId>,
#[arg(long, short = 'e', value_parser = parse_encoding, default_value = "utf-8")]
pub encoding: Encoding,
pub input: Option<PathBuf>,
#[arg(long = "examples")]
pub examples: bool,
}
#[derive(Debug, clap::Args)]
pub struct TranslateArgs {
#[arg(long = "from", value_parser = parse_dialect, required_unless_present = "examples")]
pub from: Option<DialectId>,
#[arg(long = "to", value_parser = parse_dialect, required_unless_present = "examples")]
pub to: Option<DialectId>,
#[arg(long)]
pub strict: bool,
#[arg(long, short = 'e', value_parser = parse_encoding, default_value = "utf-8")]
pub encoding: Encoding,
pub input: Option<PathBuf>,
#[arg(long = "examples")]
pub examples: bool,
}
fn parse_dialect(s: &str) -> std::result::Result<DialectId, String> {
s.parse::<DialectId>().map_err(|e| e.to_string())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
#[clap(rename_all = "kebab-case")]
pub enum ExitOn {
Error,
Warning,
Info,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
#[clap(rename_all = "kebab-case")]
pub enum CliHelpMode {
Auto,
Always,
Never,
}
impl From<CliHelpMode> for crate::lint::format::HelpMode {
fn from(m: CliHelpMode) -> Self {
match m {
CliHelpMode::Auto => Self::Auto,
CliHelpMode::Always => Self::Always,
CliHelpMode::Never => Self::Never,
}
}
}
#[derive(Debug, clap::Args)]
pub struct LintArgs {
#[arg(long = "from", value_parser = parse_dialect)]
pub from: Option<DialectId>,
#[arg(long = "to", value_parser = parse_dialect)]
pub to: Option<DialectId>,
#[arg(long, value_enum, default_value_t = LintFormat::Text)]
pub format: LintFormat,
#[arg(long = "help-mode", value_enum, default_value_t = CliHelpMode::Auto)]
pub help_mode: CliHelpMode,
#[arg(long = "no-help", conflicts_with = "help_mode")]
pub no_help: bool,
#[arg(long = "verbose", short = 'v')]
pub verbose: bool,
#[arg(long = "rule")]
pub rule: Vec<String>,
#[arg(long = "no-rule")]
pub no_rule: Vec<String>,
#[arg(long = "severity", value_enum, default_value_t = ExitOn::Info)]
pub severity: ExitOn,
#[arg(long = "exit-on", value_enum, default_value_t = ExitOn::Error)]
pub exit_on: ExitOn,
#[arg(long = "explain")]
pub explain: Option<String>,
#[arg(long = "list-rules")]
pub list_rules: bool,
#[arg(long = "schema")]
pub schemas: Vec<PathBuf>,
#[arg(long, short = 'e', value_parser = parse_encoding, default_value = "utf-8")]
pub encoding: Encoding,
pub input: Option<PathBuf>,
#[arg(long = "examples")]
pub examples: bool,
}
fn parse_encoding(s: &str) -> std::result::Result<Encoding, String> {
s.parse::<Encoding>().map_err(|e| e.to_string())
}
#[derive(Debug, clap::Args)]
pub struct BuildSchemaArgs {
#[arg(long = "from", value_parser = parse_dialect)]
pub from: Option<DialectId>,
#[arg(long = "schema")]
pub schemas: Vec<PathBuf>,
#[arg(long, short = 'e', value_parser = parse_encoding, default_value = "utf-8")]
pub encoding: Encoding,
#[arg(long, short = 'o')]
pub output: Option<PathBuf>,
#[arg(long)]
pub pretty: bool,
#[arg(long = "examples")]
pub examples: bool,
}
pub fn run() -> Result<()> {
let cli = Cli::parse();
if cli.examples {
examples::print(examples::TOP_LEVEL);
return Ok(());
}
let Some(command) = cli.command else {
let mut cmd = <Cli as clap::CommandFactory>::command();
cmd.print_help().ok();
println!();
return Ok(());
};
match command {
Command::Parse(args) => parse::run(args),
Command::Emit(args) => emit::run(args),
Command::Translate(args) => translate::run(args),
Command::Lint(args) => lint::run(args),
Command::BuildSchema(args) => build_schema::run(args),
}
}
pub(crate) fn read_input_bytes(path: Option<&std::path::Path>) -> Result<Vec<u8>> {
use std::io::Read;
let mut buf = Vec::new();
match path {
Some(p) if p.as_os_str() != "-" => {
buf = std::fs::read(p)?;
}
_ => {
std::io::stdin().read_to_end(&mut buf)?;
}
}
Ok(buf)
}
pub(crate) fn read_input_text(path: Option<&std::path::Path>, enc: Encoding) -> Result<String> {
let bytes = read_input_bytes(path)?;
enc.decode(&bytes)
}
pub(crate) fn write_sql(s: &str, enc: Encoding) -> Result<()> {
use std::io::Write;
let bytes = enc.encode(s)?;
let stdout = std::io::stdout();
let mut out = stdout.lock();
out.write_all(&bytes)?;
out.write_all(b"\n")?;
Ok(())
}