Skip to main content

scythe_codegen/
validation.rs

1use std::process::Command;
2
3/// Validate generated code structurally for a given backend.
4/// Returns a list of errors (empty = passed).
5pub fn validate_structural(code: &str, backend_name: &str) -> Vec<String> {
6    match backend_name {
7        "python-psycopg3" | "python-asyncpg" | "python-aiomysql" | "python-aiosqlite" => {
8            validate_python(code)
9        }
10        "typescript-postgres"
11        | "typescript-pg"
12        | "typescript-mysql2"
13        | "typescript-better-sqlite3" => validate_typescript(code),
14        "go-pgx" => validate_go(code),
15        "java-jdbc" => validate_java(code),
16        "kotlin-jdbc" => validate_kotlin(code),
17        "csharp-npgsql" => validate_csharp(code),
18        "elixir-postgrex" | "elixir-ecto" => validate_elixir(code),
19        "ruby-pg" | "ruby-mysql2" | "ruby-sqlite3" | "ruby-trilogy" => validate_ruby(code),
20        "php-pdo" | "php-amphp" => validate_php(code),
21        // Rust backends are validated by syn, not here.
22        "rust-sqlx" | "rust-tokio-postgres" => vec![],
23        _ => vec![format!("unknown backend: {}", backend_name)],
24    }
25}
26
27fn validate_python(code: &str) -> Vec<String> {
28    let mut errors = Vec::new();
29
30    // Check for pre-3.10 typing imports (should NOT be used)
31    if code.contains("from __future__ import annotations") {
32        errors.push(
33            "unnecessary `from __future__ import annotations` — target is Python 3.10+".into(),
34        );
35    }
36
37    let has_struct = code.contains("@dataclass")
38        || code.contains("(BaseModel)")
39        || code.contains("(msgspec.Struct)")
40        || code.contains("class ");
41    if !has_struct {
42        // No struct -- at least a function must be present.
43        if !code.contains("async def ") && !code.contains("def ") {
44            errors.push("missing `@dataclass`/`class` and `def ` -- no meaningful output".into());
45        }
46    }
47
48    if !code.contains("async def ") && !code.contains("def ") {
49        errors.push("missing `async def ` or `def ` (for query functions)".into());
50    }
51
52    if code.contains("from typing import Union") {
53        errors.push("contains `from typing import Union` (pre-3.10 style)".into());
54    }
55
56    if code.contains("from typing import Optional") {
57        errors.push("contains `from typing import Optional` (pre-3.10 style)".into());
58    }
59
60    if code.contains("List[") {
61        errors.push("contains `List[` (use lowercase `list[`)".into());
62    }
63
64    if code.contains("Dict[") {
65        errors.push("contains `Dict[` (use lowercase `dict[`)".into());
66    }
67
68    // Check for proper indentation: 4 spaces, no tabs
69    for (i, line) in code.lines().enumerate() {
70        if line.starts_with('\t') {
71            errors.push(format!(
72                "line {} uses tab indentation (should use 4 spaces)",
73                i + 1
74            ));
75            break; // one error is enough
76        }
77    }
78
79    errors
80}
81
82fn validate_typescript(code: &str) -> Vec<String> {
83    let mut errors = Vec::new();
84
85    let has_function = code.contains("export async function") || code.contains("export function");
86
87    // Structs are only required when the code is NOT exec-only (i.e. when
88    // there is something beyond a bare function).
89    let has_zod = code.contains("z.object(") || code.contains("z.infer<");
90    if !code.contains("export interface")
91        && !code.contains("export type")
92        && !has_zod
93        && !has_function
94    {
95        errors.push("missing `export interface` or `export type` (for DTOs)".into());
96    }
97
98    if !has_function {
99        errors.push("missing `export async function` or `export function`".into());
100    }
101
102    // Check for `any` type usage -- but avoid false positives in words like "many"
103    for line in code.lines() {
104        let trimmed = line.trim();
105        // Look for `: any` or `<any>` or `any;` or `any,` patterns
106        if trimmed.contains(": any")
107            || trimmed.contains("<any>")
108            || trimmed.contains("any;")
109            || trimmed.contains("any,")
110            || trimmed.contains("any)")
111        {
112            errors.push(format!(
113                "contains `any` type (should use `unknown` or specific): {}",
114                trimmed
115            ));
116            break;
117        }
118    }
119
120    errors
121}
122
123fn validate_go(code: &str) -> Vec<String> {
124    let mut errors = Vec::new();
125
126    let has_func = code.contains("func ");
127    let has_struct = code.contains("type ") && code.contains("struct {");
128
129    // Structs are only required when the code has one; exec-only queries
130    // produce just a function.
131    if !has_struct && !has_func {
132        errors.push("missing `type ... struct {` (for structs)".into());
133    }
134
135    if !has_func {
136        errors.push("missing `func ` (for functions)".into());
137    }
138
139    if !code.contains("context.Context") {
140        errors.push("missing `context.Context` as first param".into());
141    }
142
143    // Go uses tabs for indentation
144    let has_indented_lines = code
145        .lines()
146        .any(|l| l.starts_with('\t') || l.starts_with("  "));
147    if has_indented_lines {
148        let uses_spaces = code
149            .lines()
150            .any(|l| l.starts_with("    ") && !l.trim().is_empty());
151        if uses_spaces {
152            errors.push("uses space indentation (Go standard is tabs)".into());
153        }
154    }
155
156    // json tags only required when struct is present
157    if has_struct && !code.contains("json:\"") {
158        errors.push("missing `json:\"` tags on struct fields".into());
159    }
160
161    errors
162}
163
164fn validate_java(code: &str) -> Vec<String> {
165    let mut errors = Vec::new();
166
167    let has_static = code.contains("public static ");
168
169    // Records are only required when a struct was generated; exec-only
170    // queries produce just a method.
171    if !code.contains("public record ") && !has_static {
172        errors.push("missing `public record ` (for DTOs)".into());
173    }
174
175    if !has_static {
176        errors.push("missing `public static ` (for query methods)".into());
177    }
178
179    if !code.contains("throws SQLException") {
180        errors.push("missing `throws SQLException`".into());
181    }
182
183    if !code.contains("try (") {
184        errors.push("missing `try (` (try-with-resources)".into());
185    }
186
187    errors
188}
189
190fn validate_kotlin(code: &str) -> Vec<String> {
191    let mut errors = Vec::new();
192
193    let has_fun = code.contains("fun ");
194
195    // data class only required when a struct was generated
196    if !code.contains("data class ") && !has_fun {
197        errors.push("missing `data class ` (for DTOs)".into());
198    }
199
200    if !has_fun {
201        errors.push("missing `fun ` (for functions)".into());
202    }
203
204    if !code.contains(".use {") {
205        errors.push("missing `.use {` (resource management)".into());
206    }
207
208    errors
209}
210
211fn validate_csharp(code: &str) -> Vec<String> {
212    let mut errors = Vec::new();
213
214    let has_async = code.contains("async Task<") || code.contains("async Task ");
215
216    // Records are only required when a struct was generated
217    if !code.contains("public record ") && !has_async {
218        errors.push("missing `public record ` (for DTOs)".into());
219    }
220
221    if !has_async {
222        errors.push("missing `async Task<` or `async Task` (for async methods)".into());
223    }
224
225    if !code.contains("await ") {
226        errors.push("missing `await `".into());
227    }
228
229    errors
230}
231
232fn validate_elixir(code: &str) -> Vec<String> {
233    let mut errors = Vec::new();
234
235    let has_def = code.contains("def ") || code.contains("defp ");
236
237    // defmodule is only required when a struct was generated; exec-only
238    // queries produce just a function.
239    if !code.contains("defmodule ") && !has_def {
240        errors.push("missing `defmodule ` (for modules)".into());
241    }
242
243    // defstruct is only required when a struct was generated
244    if !code.contains("defstruct") && !has_def {
245        errors.push("missing `defstruct` (for structs)".into());
246    }
247
248    if !has_def {
249        errors.push("missing `def ` or `defp ` (for functions)".into());
250    }
251
252    if !code.contains("@type ") && !code.contains("@spec ") {
253        errors.push("missing `@type ` or `@spec ` (for typespecs)".into());
254    }
255
256    errors
257}
258
259fn validate_ruby(code: &str) -> Vec<String> {
260    let mut errors = Vec::new();
261
262    let has_method = code.contains("def self.");
263
264    // Data.define only required when a struct was generated
265    if !code.contains("Data.define") && !has_method {
266        errors.push("missing `Data.define` (for DTOs)".into());
267    }
268
269    if !has_method {
270        errors.push("missing `def self.` (for module methods)".into());
271    }
272
273    if !code.contains("# frozen_string_literal: true") {
274        errors.push("missing `# frozen_string_literal: true`".into());
275    }
276
277    if !code.contains("module Queries") {
278        errors.push("missing `module Queries` wrapper".into());
279    }
280
281    errors
282}
283
284fn validate_php(code: &str) -> Vec<String> {
285    let mut errors = Vec::new();
286
287    let has_function = code.contains("function ");
288
289    // readonly class only required when a struct was generated
290    if !code.contains("readonly class ") && !has_function {
291        errors.push("missing `readonly class ` (for DTOs)".into());
292    }
293
294    if !has_function {
295        errors.push("missing `function ` (for query functions)".into());
296    }
297
298    if !code.contains("declare(strict_types=1)") {
299        errors.push("missing `declare(strict_types=1)`".into());
300    }
301
302    if !code.contains("<?php") {
303        errors.push("missing `<?php`".into());
304    }
305
306    errors
307}
308
309/// Validate generated code using real language tools (if available).
310/// Returns None if the tool is not installed, Some(errors) otherwise.
311pub fn validate_with_tools(code: &str, backend_name: &str) -> Option<Vec<String>> {
312    match backend_name {
313        name if name.starts_with("python") => validate_python_tools(code),
314        name if name.starts_with("typescript") => validate_typescript_tools(code),
315        name if name.starts_with("go") => validate_go_tools(code),
316        name if name.starts_with("ruby") => validate_ruby_tools(code),
317        name if name.starts_with("php") => validate_php_tools(code),
318        name if name.starts_with("kotlin") => validate_kotlin_tools(code),
319        _ => None,
320    }
321}
322
323fn write_temp(code: &str, ext: &str) -> Option<std::path::PathBuf> {
324    use std::sync::atomic::{AtomicU64, Ordering};
325    static COUNTER: AtomicU64 = AtomicU64::new(0);
326    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
327    let basename = if ext == ".kt" {
328        format!("ScytheValidate{n}")
329    } else {
330        format!("scythe_validate_{n}")
331    };
332    let path = std::env::temp_dir().join(format!("{basename}{ext}"));
333    // Trim trailing whitespace/newlines to avoid tool complaints about blank lines at EOF
334    let trimmed = format!("{}\n", code.trim_end());
335    std::fs::write(&path, trimmed).ok()?;
336    Some(path)
337}
338
339fn validate_python_tools(code: &str) -> Option<Vec<String>> {
340    if Command::new("python3").arg("--version").output().is_err() {
341        return None;
342    }
343    let path = write_temp(code, ".py")?;
344    let mut errors = vec![];
345
346    // ast.parse — syntax check
347    let out = Command::new("python3")
348        .args([
349            "-c",
350            &format!("import ast; ast.parse(open({:?}).read())", path),
351        ])
352        .output()
353        .ok()?;
354    if !out.status.success() {
355        errors.push(format!(
356            "python syntax: {}",
357            String::from_utf8_lossy(&out.stderr)
358                .lines()
359                .next()
360                .unwrap_or("")
361        ));
362    }
363
364    // ruff check
365    if Command::new("ruff").arg("--version").output().is_ok() {
366        let out = Command::new("ruff")
367            .args([
368                "check",
369                "--select",
370                "E,F,I",
371                "--target-version",
372                "py310",
373                path.to_str()?,
374            ])
375            .output()
376            .ok()?;
377        if !out.status.success() {
378            for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
379                if !line.trim().is_empty() {
380                    errors.push(format!("ruff: {line}"));
381                }
382            }
383        }
384    }
385
386    let _ = std::fs::remove_file(&path);
387    Some(errors)
388}
389
390fn validate_typescript_tools(code: &str) -> Option<Vec<String>> {
391    if Command::new("biome").arg("--version").output().is_err() {
392        return None;
393    }
394    let path = write_temp(code, ".ts")?;
395    let mut errors = vec![];
396
397    let out = Command::new("biome")
398        .args(["check", "--no-errors-on-unmatched", path.to_str()?])
399        .output()
400        .ok()?;
401    if !out.status.success() {
402        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
403            if !line.trim().is_empty() {
404                errors.push(format!("biome: {line}"));
405            }
406        }
407    }
408
409    let _ = std::fs::remove_file(&path);
410    Some(errors)
411}
412
413fn validate_go_tools(code: &str) -> Option<Vec<String>> {
414    if Command::new("gofmt").arg("-h").output().is_err() {
415        return None;
416    }
417    let path = write_temp(code, ".go")?;
418    let mut errors = vec![];
419
420    let out = Command::new("gofmt")
421        .args(["-e", path.to_str()?])
422        .output()
423        .ok()?;
424    if !out.status.success() {
425        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
426            if !line.trim().is_empty() {
427                errors.push(format!("gofmt: {line}"));
428            }
429        }
430    }
431
432    let _ = std::fs::remove_file(&path);
433    Some(errors)
434}
435
436fn validate_ruby_tools(code: &str) -> Option<Vec<String>> {
437    if Command::new("ruby").arg("--version").output().is_err() {
438        return None;
439    }
440    let path = write_temp(code, ".rb")?;
441    let mut errors = vec![];
442
443    let out = Command::new("ruby")
444        .args(["-c", path.to_str()?])
445        .output()
446        .ok()?;
447    if !out.status.success() {
448        errors.push(format!(
449            "ruby syntax: {}",
450            String::from_utf8_lossy(&out.stderr)
451                .lines()
452                .next()
453                .unwrap_or("")
454        ));
455    }
456
457    let _ = std::fs::remove_file(&path);
458    Some(errors)
459}
460
461fn validate_php_tools(code: &str) -> Option<Vec<String>> {
462    if Command::new("php").arg("--version").output().is_err() {
463        return None;
464    }
465    let path = write_temp(code, ".php")?;
466    let mut errors = vec![];
467
468    let out = Command::new("php")
469        .args(["-l", path.to_str()?])
470        .output()
471        .ok()?;
472    if !out.status.success() {
473        errors.push(format!(
474            "php syntax: {}",
475            String::from_utf8_lossy(&out.stdout)
476                .lines()
477                .next()
478                .unwrap_or("")
479        ));
480    }
481
482    let _ = std::fs::remove_file(&path);
483    Some(errors)
484}
485
486fn validate_kotlin_tools(code: &str) -> Option<Vec<String>> {
487    if Command::new("ktlint").arg("--version").output().is_err() {
488        return None;
489    }
490    let path = write_temp(code, ".kt")?;
491    let mut errors = vec![];
492
493    let out = Command::new("ktlint")
494        .args(["--log-level=error", path.to_str()?])
495        .output()
496        .ok()?;
497    if !out.status.success() {
498        for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
499            if !line.trim().is_empty() {
500                errors.push(format!("ktlint: {line}"));
501            }
502        }
503    }
504
505    let _ = std::fs::remove_file(&path);
506    Some(errors)
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512
513    #[test]
514    fn test_unknown_backend() {
515        let errors = validate_structural("some code", "unknown-backend");
516        assert_eq!(errors.len(), 1);
517        assert!(errors[0].contains("unknown backend"));
518    }
519
520    #[test]
521    fn test_rust_backends_skip() {
522        assert!(validate_structural("anything", "rust-sqlx").is_empty());
523        assert!(validate_structural("anything", "rust-tokio-postgres").is_empty());
524    }
525
526    #[test]
527    fn test_python_valid() {
528        let code = r#"from dataclasses import dataclass
529
530@dataclass
531class ListUsersRow:
532    id: int
533    name: str
534
535async def list_users(conn) -> list[ListUsersRow]:
536    pass
537"#;
538        let errors = validate_structural(code, "python-psycopg3");
539        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
540    }
541
542    #[test]
543    fn test_python_invalid_typing() {
544        let code = r#"from typing import Optional
545
546@dataclass
547class Row:
548    id: int
549
550def query() -> List[Row]:
551    pass
552"#;
553        let errors = validate_structural(code, "python-asyncpg");
554        assert!(errors.iter().any(|e| e.contains("Optional")));
555        assert!(errors.iter().any(|e| e.contains("List[")));
556    }
557
558    #[test]
559    fn test_typescript_valid() {
560        let code = r#"export interface ListUsersRow {
561  id: number;
562  name: string;
563}
564
565export async function listUsers(): Promise<ListUsersRow[]> {
566  // ...
567}
568"#;
569        let errors = validate_structural(code, "typescript-postgres");
570        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
571    }
572
573    #[test]
574    fn test_go_valid() {
575        let code = "package db\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n)\n\ntype ListUsersRow struct {\n\tID   int    `json:\"id\"`\n\tName string `json:\"name\"`\n}\n\nfunc ListUsers(ctx context.Context) ([]ListUsersRow, error) {\n\treturn nil, nil\n}\n";
576        let errors = validate_structural(code, "go-pgx");
577        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
578    }
579
580    #[test]
581    fn test_php_valid() {
582        let code = r#"<?php
583
584declare(strict_types=1);
585
586readonly class ListUsersRow {
587    public function __construct(
588        public int $id,
589        public string $name,
590    ) {}
591}
592
593function listUsers($pdo): array {
594    return [];
595}
596"#;
597        let errors = validate_structural(code, "php-pdo");
598        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
599    }
600}