Skip to main content

scythe_codegen/
validation.rs

1use std::process::Command;
2
3/// Validate generated code structurally for a given backend.
4/// Returns a list of errors (empty = passed).
5pub fn validate_structural(code: &str, backend_name: &str) -> Vec<String> {
6    match backend_name {
7        "python-psycopg3" | "python-asyncpg" => validate_python(code),
8        "typescript-postgres" | "typescript-pg" => validate_typescript(code),
9        "go-pgx" => validate_go(code),
10        "java-jdbc" => validate_java(code),
11        "kotlin-jdbc" => validate_kotlin(code),
12        "csharp-npgsql" => validate_csharp(code),
13        "elixir-postgrex" => validate_elixir(code),
14        "ruby-pg" | "ruby-mysql2" | "ruby-sqlite3" => validate_ruby(code),
15        "php-pdo" => validate_php(code),
16        // Rust backends are validated by syn, not here.
17        "rust-sqlx" | "rust-tokio-postgres" => vec![],
18        _ => vec![format!("unknown backend: {}", backend_name)],
19    }
20}
21
22fn validate_python(code: &str) -> Vec<String> {
23    let mut errors = Vec::new();
24
25    // Check for pre-3.10 typing imports (should NOT be used)
26    if code.contains("from __future__ import annotations") {
27        errors.push(
28            "unnecessary `from __future__ import annotations` — target is Python 3.10+".into(),
29        );
30    }
31
32    let has_struct = code.contains("@dataclass") || code.contains("class ");
33    if !has_struct {
34        // No struct -- at least a function must be present.
35        if !code.contains("async def ") && !code.contains("def ") {
36            errors.push("missing `@dataclass`/`class` and `def ` -- no meaningful output".into());
37        }
38    }
39
40    if !code.contains("async def ") && !code.contains("def ") {
41        errors.push("missing `async def ` or `def ` (for query functions)".into());
42    }
43
44    if code.contains("from typing import Union") {
45        errors.push("contains `from typing import Union` (pre-3.10 style)".into());
46    }
47
48    if code.contains("from typing import Optional") {
49        errors.push("contains `from typing import Optional` (pre-3.10 style)".into());
50    }
51
52    if code.contains("List[") {
53        errors.push("contains `List[` (use lowercase `list[`)".into());
54    }
55
56    if code.contains("Dict[") {
57        errors.push("contains `Dict[` (use lowercase `dict[`)".into());
58    }
59
60    // Check for proper indentation: 4 spaces, no tabs
61    for (i, line) in code.lines().enumerate() {
62        if line.starts_with('\t') {
63            errors.push(format!(
64                "line {} uses tab indentation (should use 4 spaces)",
65                i + 1
66            ));
67            break; // one error is enough
68        }
69    }
70
71    errors
72}
73
74fn validate_typescript(code: &str) -> Vec<String> {
75    let mut errors = Vec::new();
76
77    let has_function = code.contains("export async function") || code.contains("export function");
78
79    // Structs are only required when the code is NOT exec-only (i.e. when
80    // there is something beyond a bare function).
81    if !code.contains("export interface") && !code.contains("export type") && !has_function {
82        errors.push("missing `export interface` or `export type` (for DTOs)".into());
83    }
84
85    if !has_function {
86        errors.push("missing `export async function` or `export function`".into());
87    }
88
89    // Check for `any` type usage -- but avoid false positives in words like "many"
90    for line in code.lines() {
91        let trimmed = line.trim();
92        // Look for `: any` or `<any>` or `any;` or `any,` patterns
93        if trimmed.contains(": any")
94            || trimmed.contains("<any>")
95            || trimmed.contains("any;")
96            || trimmed.contains("any,")
97            || trimmed.contains("any)")
98        {
99            errors.push(format!(
100                "contains `any` type (should use `unknown` or specific): {}",
101                trimmed
102            ));
103            break;
104        }
105    }
106
107    errors
108}
109
110fn validate_go(code: &str) -> Vec<String> {
111    let mut errors = Vec::new();
112
113    let has_func = code.contains("func ");
114    let has_struct = code.contains("type ") && code.contains("struct {");
115
116    // Structs are only required when the code has one; exec-only queries
117    // produce just a function.
118    if !has_struct && !has_func {
119        errors.push("missing `type ... struct {` (for structs)".into());
120    }
121
122    if !has_func {
123        errors.push("missing `func ` (for functions)".into());
124    }
125
126    if !code.contains("context.Context") {
127        errors.push("missing `context.Context` as first param".into());
128    }
129
130    // Go uses tabs for indentation
131    let has_indented_lines = code
132        .lines()
133        .any(|l| l.starts_with('\t') || l.starts_with("  "));
134    if has_indented_lines {
135        let uses_spaces = code
136            .lines()
137            .any(|l| l.starts_with("    ") && !l.trim().is_empty());
138        if uses_spaces {
139            errors.push("uses space indentation (Go standard is tabs)".into());
140        }
141    }
142
143    // json tags only required when struct is present
144    if has_struct && !code.contains("json:\"") {
145        errors.push("missing `json:\"` tags on struct fields".into());
146    }
147
148    errors
149}
150
151fn validate_java(code: &str) -> Vec<String> {
152    let mut errors = Vec::new();
153
154    let has_static = code.contains("public static ");
155
156    // Records are only required when a struct was generated; exec-only
157    // queries produce just a method.
158    if !code.contains("public record ") && !has_static {
159        errors.push("missing `public record ` (for DTOs)".into());
160    }
161
162    if !has_static {
163        errors.push("missing `public static ` (for query methods)".into());
164    }
165
166    if !code.contains("throws SQLException") {
167        errors.push("missing `throws SQLException`".into());
168    }
169
170    if !code.contains("try (") {
171        errors.push("missing `try (` (try-with-resources)".into());
172    }
173
174    errors
175}
176
177fn validate_kotlin(code: &str) -> Vec<String> {
178    let mut errors = Vec::new();
179
180    let has_fun = code.contains("fun ");
181
182    // data class only required when a struct was generated
183    if !code.contains("data class ") && !has_fun {
184        errors.push("missing `data class ` (for DTOs)".into());
185    }
186
187    if !has_fun {
188        errors.push("missing `fun ` (for functions)".into());
189    }
190
191    if !code.contains(".use {") {
192        errors.push("missing `.use {` (resource management)".into());
193    }
194
195    errors
196}
197
198fn validate_csharp(code: &str) -> Vec<String> {
199    let mut errors = Vec::new();
200
201    let has_async = code.contains("async Task<") || code.contains("async Task ");
202
203    // Records are only required when a struct was generated
204    if !code.contains("public record ") && !has_async {
205        errors.push("missing `public record ` (for DTOs)".into());
206    }
207
208    if !has_async {
209        errors.push("missing `async Task<` or `async Task` (for async methods)".into());
210    }
211
212    if !code.contains("await ") {
213        errors.push("missing `await `".into());
214    }
215
216    errors
217}
218
219fn validate_elixir(code: &str) -> Vec<String> {
220    let mut errors = Vec::new();
221
222    let has_def = code.contains("def ") || code.contains("defp ");
223
224    // defmodule is only required when a struct was generated; exec-only
225    // queries produce just a function.
226    if !code.contains("defmodule ") && !has_def {
227        errors.push("missing `defmodule ` (for modules)".into());
228    }
229
230    // defstruct is only required when a struct was generated
231    if !code.contains("defstruct") && !has_def {
232        errors.push("missing `defstruct` (for structs)".into());
233    }
234
235    if !has_def {
236        errors.push("missing `def ` or `defp ` (for functions)".into());
237    }
238
239    if !code.contains("@type ") && !code.contains("@spec ") {
240        errors.push("missing `@type ` or `@spec ` (for typespecs)".into());
241    }
242
243    errors
244}
245
246fn validate_ruby(code: &str) -> Vec<String> {
247    let mut errors = Vec::new();
248
249    let has_method = code.contains("def self.");
250
251    // Data.define only required when a struct was generated
252    if !code.contains("Data.define") && !has_method {
253        errors.push("missing `Data.define` (for DTOs)".into());
254    }
255
256    if !has_method {
257        errors.push("missing `def self.` (for module methods)".into());
258    }
259
260    if !code.contains("# frozen_string_literal: true") {
261        errors.push("missing `# frozen_string_literal: true`".into());
262    }
263
264    if !code.contains("module Queries") {
265        errors.push("missing `module Queries` wrapper".into());
266    }
267
268    errors
269}
270
271fn validate_php(code: &str) -> Vec<String> {
272    let mut errors = Vec::new();
273
274    let has_function = code.contains("function ");
275
276    // readonly class only required when a struct was generated
277    if !code.contains("readonly class ") && !has_function {
278        errors.push("missing `readonly class ` (for DTOs)".into());
279    }
280
281    if !has_function {
282        errors.push("missing `function ` (for query functions)".into());
283    }
284
285    if !code.contains("declare(strict_types=1)") {
286        errors.push("missing `declare(strict_types=1)`".into());
287    }
288
289    if !code.contains("<?php") {
290        errors.push("missing `<?php`".into());
291    }
292
293    errors
294}
295
296/// Validate generated code using real language tools (if available).
297/// Returns None if the tool is not installed, Some(errors) otherwise.
298pub fn validate_with_tools(code: &str, backend_name: &str) -> Option<Vec<String>> {
299    match backend_name {
300        name if name.starts_with("python") => validate_python_tools(code),
301        name if name.starts_with("typescript") => validate_typescript_tools(code),
302        name if name.starts_with("go") => validate_go_tools(code),
303        name if name.starts_with("ruby") => validate_ruby_tools(code),
304        name if name.starts_with("php") => validate_php_tools(code),
305        name if name.starts_with("kotlin") => validate_kotlin_tools(code),
306        _ => None,
307    }
308}
309
310fn write_temp(code: &str, ext: &str) -> Option<std::path::PathBuf> {
311    use std::sync::atomic::{AtomicU64, Ordering};
312    static COUNTER: AtomicU64 = AtomicU64::new(0);
313    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
314    let basename = if ext == ".kt" {
315        format!("ScytheValidate{n}")
316    } else {
317        format!("scythe_validate_{n}")
318    };
319    let path = std::env::temp_dir().join(format!("{basename}{ext}"));
320    // Trim trailing whitespace/newlines to avoid tool complaints about blank lines at EOF
321    let trimmed = format!("{}\n", code.trim_end());
322    std::fs::write(&path, trimmed).ok()?;
323    Some(path)
324}
325
326fn validate_python_tools(code: &str) -> Option<Vec<String>> {
327    if Command::new("python3").arg("--version").output().is_err() {
328        return None;
329    }
330    let path = write_temp(code, ".py")?;
331    let mut errors = vec![];
332
333    // ast.parse — syntax check
334    let out = Command::new("python3")
335        .args([
336            "-c",
337            &format!("import ast; ast.parse(open({:?}).read())", path),
338        ])
339        .output()
340        .ok()?;
341    if !out.status.success() {
342        errors.push(format!(
343            "python syntax: {}",
344            String::from_utf8_lossy(&out.stderr)
345                .lines()
346                .next()
347                .unwrap_or("")
348        ));
349    }
350
351    // ruff check
352    if Command::new("ruff").arg("--version").output().is_ok() {
353        let out = Command::new("ruff")
354            .args([
355                "check",
356                "--select",
357                "E,F,I",
358                "--target-version",
359                "py310",
360                path.to_str()?,
361            ])
362            .output()
363            .ok()?;
364        if !out.status.success() {
365            for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
366                if !line.trim().is_empty() {
367                    errors.push(format!("ruff: {line}"));
368                }
369            }
370        }
371    }
372
373    let _ = std::fs::remove_file(&path);
374    Some(errors)
375}
376
377fn validate_typescript_tools(code: &str) -> Option<Vec<String>> {
378    if Command::new("biome").arg("--version").output().is_err() {
379        return None;
380    }
381    let path = write_temp(code, ".ts")?;
382    let mut errors = vec![];
383
384    let out = Command::new("biome")
385        .args(["check", "--no-errors-on-unmatched", path.to_str()?])
386        .output()
387        .ok()?;
388    if !out.status.success() {
389        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
390            if !line.trim().is_empty() {
391                errors.push(format!("biome: {line}"));
392            }
393        }
394    }
395
396    let _ = std::fs::remove_file(&path);
397    Some(errors)
398}
399
400fn validate_go_tools(code: &str) -> Option<Vec<String>> {
401    if Command::new("gofmt").arg("-h").output().is_err() {
402        return None;
403    }
404    let path = write_temp(code, ".go")?;
405    let mut errors = vec![];
406
407    let out = Command::new("gofmt")
408        .args(["-e", path.to_str()?])
409        .output()
410        .ok()?;
411    if !out.status.success() {
412        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
413            if !line.trim().is_empty() {
414                errors.push(format!("gofmt: {line}"));
415            }
416        }
417    }
418
419    let _ = std::fs::remove_file(&path);
420    Some(errors)
421}
422
423fn validate_ruby_tools(code: &str) -> Option<Vec<String>> {
424    if Command::new("ruby").arg("--version").output().is_err() {
425        return None;
426    }
427    let path = write_temp(code, ".rb")?;
428    let mut errors = vec![];
429
430    let out = Command::new("ruby")
431        .args(["-c", path.to_str()?])
432        .output()
433        .ok()?;
434    if !out.status.success() {
435        errors.push(format!(
436            "ruby syntax: {}",
437            String::from_utf8_lossy(&out.stderr)
438                .lines()
439                .next()
440                .unwrap_or("")
441        ));
442    }
443
444    let _ = std::fs::remove_file(&path);
445    Some(errors)
446}
447
448fn validate_php_tools(code: &str) -> Option<Vec<String>> {
449    if Command::new("php").arg("--version").output().is_err() {
450        return None;
451    }
452    let path = write_temp(code, ".php")?;
453    let mut errors = vec![];
454
455    let out = Command::new("php")
456        .args(["-l", path.to_str()?])
457        .output()
458        .ok()?;
459    if !out.status.success() {
460        errors.push(format!(
461            "php syntax: {}",
462            String::from_utf8_lossy(&out.stdout)
463                .lines()
464                .next()
465                .unwrap_or("")
466        ));
467    }
468
469    let _ = std::fs::remove_file(&path);
470    Some(errors)
471}
472
473fn validate_kotlin_tools(code: &str) -> Option<Vec<String>> {
474    if Command::new("ktlint").arg("--version").output().is_err() {
475        return None;
476    }
477    let path = write_temp(code, ".kt")?;
478    let mut errors = vec![];
479
480    let out = Command::new("ktlint")
481        .args(["--log-level=error", path.to_str()?])
482        .output()
483        .ok()?;
484    if !out.status.success() {
485        for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
486            if !line.trim().is_empty() {
487                errors.push(format!("ktlint: {line}"));
488            }
489        }
490    }
491
492    let _ = std::fs::remove_file(&path);
493    Some(errors)
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499
500    #[test]
501    fn test_unknown_backend() {
502        let errors = validate_structural("some code", "unknown-backend");
503        assert_eq!(errors.len(), 1);
504        assert!(errors[0].contains("unknown backend"));
505    }
506
507    #[test]
508    fn test_rust_backends_skip() {
509        assert!(validate_structural("anything", "rust-sqlx").is_empty());
510        assert!(validate_structural("anything", "rust-tokio-postgres").is_empty());
511    }
512
513    #[test]
514    fn test_python_valid() {
515        let code = r#"from dataclasses import dataclass
516
517@dataclass
518class ListUsersRow:
519    id: int
520    name: str
521
522async def list_users(conn) -> list[ListUsersRow]:
523    pass
524"#;
525        let errors = validate_structural(code, "python-psycopg3");
526        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
527    }
528
529    #[test]
530    fn test_python_invalid_typing() {
531        let code = r#"from typing import Optional
532
533@dataclass
534class Row:
535    id: int
536
537def query() -> List[Row]:
538    pass
539"#;
540        let errors = validate_structural(code, "python-asyncpg");
541        assert!(errors.iter().any(|e| e.contains("Optional")));
542        assert!(errors.iter().any(|e| e.contains("List[")));
543    }
544
545    #[test]
546    fn test_typescript_valid() {
547        let code = r#"export interface ListUsersRow {
548  id: number;
549  name: string;
550}
551
552export async function listUsers(): Promise<ListUsersRow[]> {
553  // ...
554}
555"#;
556        let errors = validate_structural(code, "typescript-postgres");
557        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
558    }
559
560    #[test]
561    fn test_go_valid() {
562        let code = "package db\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n)\n\ntype ListUsersRow struct {\n\tID   int    `json:\"id\"`\n\tName string `json:\"name\"`\n}\n\nfunc ListUsers(ctx context.Context) ([]ListUsersRow, error) {\n\treturn nil, nil\n}\n";
563        let errors = validate_structural(code, "go-pgx");
564        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
565    }
566
567    #[test]
568    fn test_php_valid() {
569        let code = r#"<?php
570
571declare(strict_types=1);
572
573readonly class ListUsersRow {
574    public function __construct(
575        public int $id,
576        public string $name,
577    ) {}
578}
579
580function listUsers($pdo): array {
581    return [];
582}
583"#;
584        let errors = validate_structural(code, "php-pdo");
585        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
586    }
587}