Skip to main content

scythe_codegen/
validation.rs

1use std::process::Command;
2
3/// Validate generated code structurally for a given backend.
4/// Returns a list of errors (empty = passed).
5pub fn validate_structural(code: &str, backend_name: &str) -> Vec<String> {
6    match backend_name {
7        "python-psycopg3" | "python-asyncpg" | "python-aiomysql" | "python-aiosqlite"
8        | "python-duckdb" => validate_python(code),
9        "typescript-postgres"
10        | "typescript-pg"
11        | "typescript-mysql2"
12        | "typescript-better-sqlite3"
13        | "typescript-duckdb" => validate_typescript(code),
14        "go-pgx" | "go-database-sql" => validate_go(code),
15        "java-jdbc" => validate_java(code),
16        "java-r2dbc" => validate_java_r2dbc(code),
17        "kotlin-exposed" => validate_kotlin_exposed(code),
18        "kotlin-jdbc" => validate_kotlin(code),
19        "kotlin-r2dbc" => validate_kotlin_r2dbc(code),
20        "csharp-npgsql" => validate_csharp(code),
21        "elixir-postgrex" | "elixir-ecto" => validate_elixir(code),
22        "ruby-pg" | "ruby-mysql2" | "ruby-sqlite3" | "ruby-trilogy" => validate_ruby(code),
23        "php-pdo" | "php-amphp" => validate_php(code),
24        // Rust backends are validated by syn, not here.
25        "rust-sqlx" | "rust-tokio-postgres" => vec![],
26        _ => vec![format!("unknown backend: {}", backend_name)],
27    }
28}
29
30fn validate_python(code: &str) -> Vec<String> {
31    let mut errors = Vec::new();
32
33    // Check for pre-3.10 typing imports (should NOT be used)
34    if code.contains("from __future__ import annotations") {
35        errors.push(
36            "unnecessary `from __future__ import annotations` — target is Python 3.10+".into(),
37        );
38    }
39
40    let has_struct = code.contains("@dataclass")
41        || code.contains("(BaseModel)")
42        || code.contains("(msgspec.Struct)")
43        || code.contains("class ");
44    if !has_struct {
45        // No struct -- at least a function must be present.
46        if !code.contains("async def ") && !code.contains("def ") {
47            errors.push("missing `@dataclass`/`class` and `def ` -- no meaningful output".into());
48        }
49    }
50
51    if !code.contains("async def ") && !code.contains("def ") {
52        errors.push("missing `async def ` or `def ` (for query functions)".into());
53    }
54
55    if code.contains("from typing import Union") {
56        errors.push("contains `from typing import Union` (pre-3.10 style)".into());
57    }
58
59    if code.contains("from typing import Optional") {
60        errors.push("contains `from typing import Optional` (pre-3.10 style)".into());
61    }
62
63    if code.contains("List[") {
64        errors.push("contains `List[` (use lowercase `list[`)".into());
65    }
66
67    if code.contains("Dict[") {
68        errors.push("contains `Dict[` (use lowercase `dict[`)".into());
69    }
70
71    // Check for proper indentation: 4 spaces, no tabs
72    for (i, line) in code.lines().enumerate() {
73        if line.starts_with('\t') {
74            errors.push(format!(
75                "line {} uses tab indentation (should use 4 spaces)",
76                i + 1
77            ));
78            break; // one error is enough
79        }
80    }
81
82    errors
83}
84
85fn validate_typescript(code: &str) -> Vec<String> {
86    let mut errors = Vec::new();
87
88    let has_function = code.contains("export async function") || code.contains("export function");
89
90    // Structs are only required when the code is NOT exec-only (i.e. when
91    // there is something beyond a bare function).
92    let has_zod = code.contains("z.object(") || code.contains("z.infer<");
93    if !code.contains("export interface")
94        && !code.contains("export type")
95        && !has_zod
96        && !has_function
97    {
98        errors.push("missing `export interface` or `export type` (for DTOs)".into());
99    }
100
101    if !has_function {
102        errors.push("missing `export async function` or `export function`".into());
103    }
104
105    // Check for `any` type usage -- but avoid false positives in words like "many"
106    for line in code.lines() {
107        let trimmed = line.trim();
108        // Look for `: any` or `<any>` or `any;` or `any,` patterns
109        if trimmed.contains(": any")
110            || trimmed.contains("<any>")
111            || trimmed.contains("any;")
112            || trimmed.contains("any,")
113            || trimmed.contains("any)")
114        {
115            errors.push(format!(
116                "contains `any` type (should use `unknown` or specific): {}",
117                trimmed
118            ));
119            break;
120        }
121    }
122
123    errors
124}
125
126fn validate_go(code: &str) -> Vec<String> {
127    let mut errors = Vec::new();
128
129    let has_func = code.contains("func ");
130    let has_struct = code.contains("type ") && code.contains("struct {");
131
132    // Structs are only required when the code has one; exec-only queries
133    // produce just a function.
134    if !has_struct && !has_func {
135        errors.push("missing `type ... struct {` (for structs)".into());
136    }
137
138    if !has_func {
139        errors.push("missing `func ` (for functions)".into());
140    }
141
142    if !code.contains("context.Context") {
143        errors.push("missing `context.Context` as first param".into());
144    }
145
146    // Go uses tabs for indentation
147    let has_indented_lines = code
148        .lines()
149        .any(|l| l.starts_with('\t') || l.starts_with("  "));
150    if has_indented_lines {
151        let uses_spaces = code
152            .lines()
153            .any(|l| l.starts_with("    ") && !l.trim().is_empty());
154        if uses_spaces {
155            errors.push("uses space indentation (Go standard is tabs)".into());
156        }
157    }
158
159    // json tags only required when struct is present
160    if has_struct && !code.contains("json:\"") {
161        errors.push("missing `json:\"` tags on struct fields".into());
162    }
163
164    errors
165}
166
167fn validate_java(code: &str) -> Vec<String> {
168    let mut errors = Vec::new();
169
170    let has_static = code.contains("public static ");
171
172    // Records are only required when a struct was generated; exec-only
173    // queries produce just a method.
174    if !code.contains("public record ") && !has_static {
175        errors.push("missing `public record ` (for DTOs)".into());
176    }
177
178    if !has_static {
179        errors.push("missing `public static ` (for query methods)".into());
180    }
181
182    if !code.contains("throws SQLException") {
183        errors.push("missing `throws SQLException`".into());
184    }
185
186    if !code.contains("try (") {
187        errors.push("missing `try (` (try-with-resources)".into());
188    }
189
190    errors
191}
192
193fn validate_kotlin(code: &str) -> Vec<String> {
194    let mut errors = Vec::new();
195
196    let has_fun = code.contains("fun ");
197
198    // data class only required when a struct was generated
199    if !code.contains("data class ") && !has_fun {
200        errors.push("missing `data class ` (for DTOs)".into());
201    }
202
203    if !has_fun {
204        errors.push("missing `fun ` (for functions)".into());
205    }
206
207    if !code.contains(".use {") {
208        errors.push("missing `.use {` (resource management)".into());
209    }
210
211    errors
212}
213
214fn validate_kotlin_exposed(code: &str) -> Vec<String> {
215    let mut errors = Vec::new();
216
217    let has_fun = code.contains("fun ");
218
219    // data class or object Table only required when a struct was generated
220    if !code.contains("data class ") && !code.contains("object ") && !has_fun {
221        errors.push("missing `data class ` or `object ` (for DTOs/Tables)".into());
222    }
223
224    if !has_fun {
225        errors.push("missing `fun ` (for functions)".into());
226    }
227
228    if !code.contains("transaction {") {
229        errors.push("missing `transaction {` (Exposed transaction block)".into());
230    }
231
232    errors
233}
234
235fn validate_java_r2dbc(code: &str) -> Vec<String> {
236    let mut errors = Vec::new();
237
238    let has_static = code.contains("public static ");
239
240    if !code.contains("public record ") && !has_static {
241        errors.push("missing `public record ` (for DTOs)".into());
242    }
243
244    if !has_static {
245        errors.push("missing `public static ` (for query methods)".into());
246    }
247
248    if !code.contains("Mono<") && !code.contains("Flux<") {
249        errors.push("missing `Mono<` or `Flux<` (reactive types)".into());
250    }
251
252    if !code.contains("ConnectionFactory") {
253        errors.push("missing `ConnectionFactory` parameter".into());
254    }
255
256    errors
257}
258
259fn validate_kotlin_r2dbc(code: &str) -> Vec<String> {
260    let mut errors = Vec::new();
261
262    let has_fun = code.contains("fun ");
263
264    if !code.contains("data class ") && !has_fun {
265        errors.push("missing `data class ` (for DTOs)".into());
266    }
267
268    if !has_fun {
269        errors.push("missing `fun ` (for functions)".into());
270    }
271
272    if !code.contains("ConnectionFactory") {
273        errors.push("missing `ConnectionFactory` parameter".into());
274    }
275
276    // Should use either suspend fun or Flow
277    if !code.contains("suspend fun") && !code.contains("Flow<") {
278        errors.push("missing `suspend fun` or `Flow<` (coroutine/reactive types)".into());
279    }
280
281    errors
282}
283
284fn validate_csharp(code: &str) -> Vec<String> {
285    let mut errors = Vec::new();
286
287    let has_async = code.contains("async Task<") || code.contains("async Task ");
288
289    // Records are only required when a struct was generated
290    if !code.contains("public record ") && !has_async {
291        errors.push("missing `public record ` (for DTOs)".into());
292    }
293
294    if !has_async {
295        errors.push("missing `async Task<` or `async Task` (for async methods)".into());
296    }
297
298    if !code.contains("await ") {
299        errors.push("missing `await `".into());
300    }
301
302    errors
303}
304
305fn validate_elixir(code: &str) -> Vec<String> {
306    let mut errors = Vec::new();
307
308    let has_def = code.contains("def ") || code.contains("defp ");
309
310    // defmodule is only required when a struct was generated; exec-only
311    // queries produce just a function.
312    if !code.contains("defmodule ") && !has_def {
313        errors.push("missing `defmodule ` (for modules)".into());
314    }
315
316    // defstruct is only required when a struct was generated
317    if !code.contains("defstruct") && !has_def {
318        errors.push("missing `defstruct` (for structs)".into());
319    }
320
321    if !has_def {
322        errors.push("missing `def ` or `defp ` (for functions)".into());
323    }
324
325    if !code.contains("@type ") && !code.contains("@spec ") {
326        errors.push("missing `@type ` or `@spec ` (for typespecs)".into());
327    }
328
329    errors
330}
331
332fn validate_ruby(code: &str) -> Vec<String> {
333    let mut errors = Vec::new();
334
335    let has_method = code.contains("def self.");
336
337    // Data.define only required when a struct was generated
338    if !code.contains("Data.define") && !has_method {
339        errors.push("missing `Data.define` (for DTOs)".into());
340    }
341
342    if !has_method {
343        errors.push("missing `def self.` (for module methods)".into());
344    }
345
346    if !code.contains("# frozen_string_literal: true") {
347        errors.push("missing `# frozen_string_literal: true`".into());
348    }
349
350    if !code.contains("module Queries") {
351        errors.push("missing `module Queries` wrapper".into());
352    }
353
354    errors
355}
356
357fn validate_php(code: &str) -> Vec<String> {
358    let mut errors = Vec::new();
359
360    let has_function = code.contains("function ");
361
362    // readonly class only required when a struct was generated
363    if !code.contains("readonly class ") && !has_function {
364        errors.push("missing `readonly class ` (for DTOs)".into());
365    }
366
367    if !has_function {
368        errors.push("missing `function ` (for query functions)".into());
369    }
370
371    if !code.contains("declare(strict_types=1)") {
372        errors.push("missing `declare(strict_types=1)`".into());
373    }
374
375    if !code.contains("<?php") {
376        errors.push("missing `<?php`".into());
377    }
378
379    errors
380}
381
382/// Validate generated code using real language tools (if available).
383/// Returns None if the tool is not installed, Some(errors) otherwise.
384pub fn validate_with_tools(code: &str, backend_name: &str) -> Option<Vec<String>> {
385    match backend_name {
386        name if name.starts_with("python") => validate_python_tools(code),
387        name if name.starts_with("typescript") => validate_typescript_tools(code),
388        name if name.starts_with("go") => validate_go_tools(code),
389        name if name.starts_with("ruby") => validate_ruby_tools(code),
390        name if name.starts_with("php") => validate_php_tools(code),
391        name if name.starts_with("kotlin") => validate_kotlin_tools(code),
392        _ => None,
393    }
394}
395
396fn write_temp(code: &str, ext: &str) -> Option<std::path::PathBuf> {
397    use std::sync::atomic::{AtomicU64, Ordering};
398    static COUNTER: AtomicU64 = AtomicU64::new(0);
399    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
400    let basename = if ext == ".kt" {
401        format!("ScytheValidate{n}")
402    } else {
403        format!("scythe_validate_{n}")
404    };
405    let path = std::env::temp_dir().join(format!("{basename}{ext}"));
406    // Trim trailing whitespace/newlines to avoid tool complaints about blank lines at EOF
407    let trimmed = format!("{}\n", code.trim_end());
408    std::fs::write(&path, trimmed).ok()?;
409    Some(path)
410}
411
412fn validate_python_tools(code: &str) -> Option<Vec<String>> {
413    if Command::new("python3").arg("--version").output().is_err() {
414        return None;
415    }
416    let path = write_temp(code, ".py")?;
417    let mut errors = vec![];
418
419    // ast.parse — syntax check
420    let out = Command::new("python3")
421        .args([
422            "-c",
423            &format!("import ast; ast.parse(open({:?}).read())", path),
424        ])
425        .output()
426        .ok()?;
427    if !out.status.success() {
428        errors.push(format!(
429            "python syntax: {}",
430            String::from_utf8_lossy(&out.stderr)
431                .lines()
432                .next()
433                .unwrap_or("")
434        ));
435    }
436
437    // ruff check
438    if Command::new("ruff").arg("--version").output().is_ok() {
439        let out = Command::new("ruff")
440            .args([
441                "check",
442                "--select",
443                "E,F,I",
444                "--target-version",
445                "py310",
446                path.to_str()?,
447            ])
448            .output()
449            .ok()?;
450        if !out.status.success() {
451            for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
452                if !line.trim().is_empty() {
453                    errors.push(format!("ruff: {line}"));
454                }
455            }
456        }
457    }
458
459    let _ = std::fs::remove_file(&path);
460    Some(errors)
461}
462
463fn validate_typescript_tools(code: &str) -> Option<Vec<String>> {
464    if Command::new("biome").arg("--version").output().is_err() {
465        return None;
466    }
467    let path = write_temp(code, ".ts")?;
468    let mut errors = vec![];
469
470    let out = Command::new("biome")
471        .args(["check", "--no-errors-on-unmatched", path.to_str()?])
472        .output()
473        .ok()?;
474    if !out.status.success() {
475        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
476            if !line.trim().is_empty() {
477                errors.push(format!("biome: {line}"));
478            }
479        }
480    }
481
482    let _ = std::fs::remove_file(&path);
483    Some(errors)
484}
485
486fn validate_go_tools(code: &str) -> Option<Vec<String>> {
487    if Command::new("gofmt").arg("-h").output().is_err() {
488        return None;
489    }
490    let path = write_temp(code, ".go")?;
491    let mut errors = vec![];
492
493    let out = Command::new("gofmt")
494        .args(["-e", path.to_str()?])
495        .output()
496        .ok()?;
497    if !out.status.success() {
498        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
499            if !line.trim().is_empty() {
500                errors.push(format!("gofmt: {line}"));
501            }
502        }
503    }
504
505    let _ = std::fs::remove_file(&path);
506    Some(errors)
507}
508
509fn validate_ruby_tools(code: &str) -> Option<Vec<String>> {
510    if Command::new("ruby").arg("--version").output().is_err() {
511        return None;
512    }
513    let path = write_temp(code, ".rb")?;
514    let mut errors = vec![];
515
516    let out = Command::new("ruby")
517        .args(["-c", path.to_str()?])
518        .output()
519        .ok()?;
520    if !out.status.success() {
521        errors.push(format!(
522            "ruby syntax: {}",
523            String::from_utf8_lossy(&out.stderr)
524                .lines()
525                .next()
526                .unwrap_or("")
527        ));
528    }
529
530    let _ = std::fs::remove_file(&path);
531    Some(errors)
532}
533
534fn validate_php_tools(code: &str) -> Option<Vec<String>> {
535    if Command::new("php").arg("--version").output().is_err() {
536        return None;
537    }
538    let path = write_temp(code, ".php")?;
539    let mut errors = vec![];
540
541    let out = Command::new("php")
542        .args(["-l", path.to_str()?])
543        .output()
544        .ok()?;
545    if !out.status.success() {
546        errors.push(format!(
547            "php syntax: {}",
548            String::from_utf8_lossy(&out.stdout)
549                .lines()
550                .next()
551                .unwrap_or("")
552        ));
553    }
554
555    let _ = std::fs::remove_file(&path);
556    Some(errors)
557}
558
559fn validate_kotlin_tools(code: &str) -> Option<Vec<String>> {
560    if Command::new("ktlint").arg("--version").output().is_err() {
561        return None;
562    }
563    let path = write_temp(code, ".kt")?;
564    let mut errors = vec![];
565
566    let out = Command::new("ktlint")
567        .args(["--log-level=error", path.to_str()?])
568        .output()
569        .ok()?;
570    if !out.status.success() {
571        for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
572            if !line.trim().is_empty() {
573                errors.push(format!("ktlint: {line}"));
574            }
575        }
576    }
577
578    let _ = std::fs::remove_file(&path);
579    Some(errors)
580}
581
582#[cfg(test)]
583mod tests {
584    use super::*;
585
586    #[test]
587    fn test_unknown_backend() {
588        let errors = validate_structural("some code", "unknown-backend");
589        assert_eq!(errors.len(), 1);
590        assert!(errors[0].contains("unknown backend"));
591    }
592
593    #[test]
594    fn test_rust_backends_skip() {
595        assert!(validate_structural("anything", "rust-sqlx").is_empty());
596        assert!(validate_structural("anything", "rust-tokio-postgres").is_empty());
597    }
598
599    #[test]
600    fn test_python_valid() {
601        let code = r#"from dataclasses import dataclass
602
603@dataclass
604class ListUsersRow:
605    id: int
606    name: str
607
608async def list_users(conn) -> list[ListUsersRow]:
609    pass
610"#;
611        let errors = validate_structural(code, "python-psycopg3");
612        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
613    }
614
615    #[test]
616    fn test_python_invalid_typing() {
617        let code = r#"from typing import Optional
618
619@dataclass
620class Row:
621    id: int
622
623def query() -> List[Row]:
624    pass
625"#;
626        let errors = validate_structural(code, "python-asyncpg");
627        assert!(errors.iter().any(|e| e.contains("Optional")));
628        assert!(errors.iter().any(|e| e.contains("List[")));
629    }
630
631    #[test]
632    fn test_typescript_valid() {
633        let code = r#"export interface ListUsersRow {
634  id: number;
635  name: string;
636}
637
638export async function listUsers(): Promise<ListUsersRow[]> {
639  // ...
640}
641"#;
642        let errors = validate_structural(code, "typescript-postgres");
643        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
644    }
645
646    #[test]
647    fn test_go_valid() {
648        let code = "package db\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n)\n\ntype ListUsersRow struct {\n\tID   int    `json:\"id\"`\n\tName string `json:\"name\"`\n}\n\nfunc ListUsers(ctx context.Context) ([]ListUsersRow, error) {\n\treturn nil, nil\n}\n";
649        let errors = validate_structural(code, "go-pgx");
650        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
651    }
652
653    #[test]
654    fn test_php_valid() {
655        let code = r#"<?php
656
657declare(strict_types=1);
658
659readonly class ListUsersRow {
660    public function __construct(
661        public int $id,
662        public string $name,
663    ) {}
664}
665
666function listUsers($pdo): array {
667    return [];
668}
669"#;
670        let errors = validate_structural(code, "php-pdo");
671        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
672    }
673}