Skip to main content

scythe_codegen/
validation.rs

1use std::process::Command;
2
3/// Validate generated code structurally for a given backend.
4/// Returns a list of errors (empty = passed).
5pub fn validate_structural(code: &str, backend_name: &str) -> Vec<String> {
6    match backend_name {
7        "python-psycopg3" | "python-asyncpg" => validate_python(code),
8        "typescript-postgres" | "typescript-pg" => validate_typescript(code),
9        "go-pgx" => validate_go(code),
10        "java-jdbc" => validate_java(code),
11        "kotlin-jdbc" => validate_kotlin(code),
12        "csharp-npgsql" => validate_csharp(code),
13        "elixir-postgrex" => validate_elixir(code),
14        "ruby-pg" => validate_ruby(code),
15        "php-pdo" => validate_php(code),
16        // Rust backends are validated by syn, not here.
17        "rust-sqlx" | "rust-tokio-postgres" => vec![],
18        _ => vec![format!("unknown backend: {}", backend_name)],
19    }
20}
21
22fn validate_python(code: &str) -> Vec<String> {
23    let mut errors = Vec::new();
24
25    // Check for pre-3.10 typing imports (should NOT be used)
26    if code.contains("from __future__ import annotations") {
27        errors.push(
28            "unnecessary `from __future__ import annotations` — target is Python 3.10+".into(),
29        );
30    }
31
32    let has_struct = code.contains("@dataclass") || code.contains("class ");
33    if !has_struct {
34        // No struct -- at least a function must be present.
35        if !code.contains("async def ") && !code.contains("def ") {
36            errors.push("missing `@dataclass`/`class` and `def ` -- no meaningful output".into());
37        }
38    }
39
40    if !code.contains("async def ") && !code.contains("def ") {
41        errors.push("missing `async def ` or `def ` (for query functions)".into());
42    }
43
44    if code.contains("from typing import Union") {
45        errors.push("contains `from typing import Union` (pre-3.10 style)".into());
46    }
47
48    if code.contains("from typing import Optional") {
49        errors.push("contains `from typing import Optional` (pre-3.10 style)".into());
50    }
51
52    if code.contains("List[") {
53        errors.push("contains `List[` (use lowercase `list[`)".into());
54    }
55
56    if code.contains("Dict[") {
57        errors.push("contains `Dict[` (use lowercase `dict[`)".into());
58    }
59
60    // Check for proper indentation: 4 spaces, no tabs
61    for (i, line) in code.lines().enumerate() {
62        if line.starts_with('\t') {
63            errors.push(format!(
64                "line {} uses tab indentation (should use 4 spaces)",
65                i + 1
66            ));
67            break; // one error is enough
68        }
69    }
70
71    errors
72}
73
74fn validate_typescript(code: &str) -> Vec<String> {
75    let mut errors = Vec::new();
76
77    let has_function = code.contains("export async function") || code.contains("export function");
78
79    // Structs are only required when the code is NOT exec-only (i.e. when
80    // there is something beyond a bare function).
81    if !code.contains("export interface") && !code.contains("export type") && !has_function {
82        errors.push("missing `export interface` or `export type` (for DTOs)".into());
83    }
84
85    if !has_function {
86        errors.push("missing `export async function` or `export function`".into());
87    }
88
89    // Check for `any` type usage -- but avoid false positives in words like "many"
90    for line in code.lines() {
91        let trimmed = line.trim();
92        // Look for `: any` or `<any>` or `any;` or `any,` patterns
93        if trimmed.contains(": any")
94            || trimmed.contains("<any>")
95            || trimmed.contains("any;")
96            || trimmed.contains("any,")
97            || trimmed.contains("any)")
98        {
99            errors.push(format!(
100                "contains `any` type (should use `unknown` or specific): {}",
101                trimmed
102            ));
103            break;
104        }
105    }
106
107    errors
108}
109
110fn validate_go(code: &str) -> Vec<String> {
111    let mut errors = Vec::new();
112
113    let has_func = code.contains("func ");
114    let has_struct = code.contains("type ") && code.contains("struct {");
115
116    // Structs are only required when the code has one; exec-only queries
117    // produce just a function.
118    if !has_struct && !has_func {
119        errors.push("missing `type ... struct {` (for structs)".into());
120    }
121
122    if !has_func {
123        errors.push("missing `func ` (for functions)".into());
124    }
125
126    if !code.contains("context.Context") {
127        errors.push("missing `context.Context` as first param".into());
128    }
129
130    // Go uses tabs for indentation
131    let has_indented_lines = code
132        .lines()
133        .any(|l| l.starts_with('\t') || l.starts_with("  "));
134    if has_indented_lines {
135        let uses_spaces = code
136            .lines()
137            .any(|l| l.starts_with("    ") && !l.trim().is_empty());
138        if uses_spaces {
139            errors.push("uses space indentation (Go standard is tabs)".into());
140        }
141    }
142
143    // json tags only required when struct is present
144    if has_struct && !code.contains("json:\"") {
145        errors.push("missing `json:\"` tags on struct fields".into());
146    }
147
148    errors
149}
150
151fn validate_java(code: &str) -> Vec<String> {
152    let mut errors = Vec::new();
153
154    let has_static = code.contains("public static ");
155
156    // Records are only required when a struct was generated; exec-only
157    // queries produce just a method.
158    if !code.contains("public record ") && !has_static {
159        errors.push("missing `public record ` (for DTOs)".into());
160    }
161
162    if !has_static {
163        errors.push("missing `public static ` (for query methods)".into());
164    }
165
166    if !code.contains("throws SQLException") {
167        errors.push("missing `throws SQLException`".into());
168    }
169
170    if !code.contains("try (") {
171        errors.push("missing `try (` (try-with-resources)".into());
172    }
173
174    errors
175}
176
177fn validate_kotlin(code: &str) -> Vec<String> {
178    let mut errors = Vec::new();
179
180    let has_fun = code.contains("fun ");
181
182    // data class only required when a struct was generated
183    if !code.contains("data class ") && !has_fun {
184        errors.push("missing `data class ` (for DTOs)".into());
185    }
186
187    if !has_fun {
188        errors.push("missing `fun ` (for functions)".into());
189    }
190
191    if !code.contains(".use {") {
192        errors.push("missing `.use {` (resource management)".into());
193    }
194
195    errors
196}
197
198fn validate_csharp(code: &str) -> Vec<String> {
199    let mut errors = Vec::new();
200
201    let has_async = code.contains("async Task<") || code.contains("async Task ");
202
203    // Records are only required when a struct was generated
204    if !code.contains("public record ") && !has_async {
205        errors.push("missing `public record ` (for DTOs)".into());
206    }
207
208    if !has_async {
209        errors.push("missing `async Task<` or `async Task` (for async methods)".into());
210    }
211
212    if !code.contains("await ") {
213        errors.push("missing `await `".into());
214    }
215
216    errors
217}
218
219fn validate_elixir(code: &str) -> Vec<String> {
220    let mut errors = Vec::new();
221
222    let has_def = code.contains("def ") || code.contains("defp ");
223
224    // defmodule is only required when a struct was generated; exec-only
225    // queries produce just a function.
226    if !code.contains("defmodule ") && !has_def {
227        errors.push("missing `defmodule ` (for modules)".into());
228    }
229
230    // defstruct is only required when a struct was generated
231    if !code.contains("defstruct") && !has_def {
232        errors.push("missing `defstruct` (for structs)".into());
233    }
234
235    if !has_def {
236        errors.push("missing `def ` or `defp ` (for functions)".into());
237    }
238
239    if !code.contains("@type ") && !code.contains("@spec ") {
240        errors.push("missing `@type ` or `@spec ` (for typespecs)".into());
241    }
242
243    errors
244}
245
246fn validate_ruby(code: &str) -> Vec<String> {
247    let mut errors = Vec::new();
248
249    let has_method = code.contains("def self.");
250
251    // Data.define only required when a struct was generated
252    if !code.contains("Data.define") && !has_method {
253        errors.push("missing `Data.define` (for DTOs)".into());
254    }
255
256    if !has_method {
257        errors.push("missing `def self.` (for module methods)".into());
258    }
259
260    if !code.contains("# frozen_string_literal: true") {
261        errors.push("missing `# frozen_string_literal: true`".into());
262    }
263
264    errors
265}
266
267fn validate_php(code: &str) -> Vec<String> {
268    let mut errors = Vec::new();
269
270    let has_function = code.contains("function ");
271
272    // readonly class only required when a struct was generated
273    if !code.contains("readonly class ") && !has_function {
274        errors.push("missing `readonly class ` (for DTOs)".into());
275    }
276
277    if !has_function {
278        errors.push("missing `function ` (for query functions)".into());
279    }
280
281    if !code.contains("declare(strict_types=1)") {
282        errors.push("missing `declare(strict_types=1)`".into());
283    }
284
285    if !code.contains("<?php") {
286        errors.push("missing `<?php`".into());
287    }
288
289    errors
290}
291
292/// Validate generated code using real language tools (if available).
293/// Returns None if the tool is not installed, Some(errors) otherwise.
294pub fn validate_with_tools(code: &str, backend_name: &str) -> Option<Vec<String>> {
295    match backend_name {
296        name if name.starts_with("python") => validate_python_tools(code),
297        name if name.starts_with("typescript") => validate_typescript_tools(code),
298        name if name.starts_with("go") => validate_go_tools(code),
299        name if name.starts_with("ruby") => validate_ruby_tools(code),
300        name if name.starts_with("php") => validate_php_tools(code),
301        name if name.starts_with("kotlin") => validate_kotlin_tools(code),
302        _ => None,
303    }
304}
305
306fn write_temp(code: &str, ext: &str) -> Option<std::path::PathBuf> {
307    use std::sync::atomic::{AtomicU64, Ordering};
308    static COUNTER: AtomicU64 = AtomicU64::new(0);
309    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
310    let basename = if ext == ".kt" {
311        format!("ScytheValidate{n}")
312    } else {
313        format!("scythe_validate_{n}")
314    };
315    let path = std::env::temp_dir().join(format!("{basename}{ext}"));
316    // Trim trailing whitespace/newlines to avoid tool complaints about blank lines at EOF
317    let trimmed = format!("{}\n", code.trim_end());
318    std::fs::write(&path, trimmed).ok()?;
319    Some(path)
320}
321
322fn validate_python_tools(code: &str) -> Option<Vec<String>> {
323    if Command::new("python3").arg("--version").output().is_err() {
324        return None;
325    }
326    let path = write_temp(code, ".py")?;
327    let mut errors = vec![];
328
329    // ast.parse — syntax check
330    let out = Command::new("python3")
331        .args([
332            "-c",
333            &format!("import ast; ast.parse(open({:?}).read())", path),
334        ])
335        .output()
336        .ok()?;
337    if !out.status.success() {
338        errors.push(format!(
339            "python syntax: {}",
340            String::from_utf8_lossy(&out.stderr)
341                .lines()
342                .next()
343                .unwrap_or("")
344        ));
345    }
346
347    // ruff check
348    if Command::new("ruff").arg("--version").output().is_ok() {
349        let out = Command::new("ruff")
350            .args([
351                "check",
352                "--select",
353                "E,F,I",
354                "--target-version",
355                "py310",
356                path.to_str()?,
357            ])
358            .output()
359            .ok()?;
360        if !out.status.success() {
361            for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
362                if !line.trim().is_empty() {
363                    errors.push(format!("ruff: {line}"));
364                }
365            }
366        }
367    }
368
369    let _ = std::fs::remove_file(&path);
370    Some(errors)
371}
372
373fn validate_typescript_tools(code: &str) -> Option<Vec<String>> {
374    if Command::new("biome").arg("--version").output().is_err() {
375        return None;
376    }
377    let path = write_temp(code, ".ts")?;
378    let mut errors = vec![];
379
380    let out = Command::new("biome")
381        .args(["check", "--no-errors-on-unmatched", path.to_str()?])
382        .output()
383        .ok()?;
384    if !out.status.success() {
385        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
386            if !line.trim().is_empty() {
387                errors.push(format!("biome: {line}"));
388            }
389        }
390    }
391
392    let _ = std::fs::remove_file(&path);
393    Some(errors)
394}
395
396fn validate_go_tools(code: &str) -> Option<Vec<String>> {
397    if Command::new("gofmt").arg("-h").output().is_err() {
398        return None;
399    }
400    let path = write_temp(code, ".go")?;
401    let mut errors = vec![];
402
403    let out = Command::new("gofmt")
404        .args(["-e", path.to_str()?])
405        .output()
406        .ok()?;
407    if !out.status.success() {
408        for line in String::from_utf8_lossy(&out.stderr).lines().take(3) {
409            if !line.trim().is_empty() {
410                errors.push(format!("gofmt: {line}"));
411            }
412        }
413    }
414
415    let _ = std::fs::remove_file(&path);
416    Some(errors)
417}
418
419fn validate_ruby_tools(code: &str) -> Option<Vec<String>> {
420    if Command::new("ruby").arg("--version").output().is_err() {
421        return None;
422    }
423    let path = write_temp(code, ".rb")?;
424    let mut errors = vec![];
425
426    let out = Command::new("ruby")
427        .args(["-c", path.to_str()?])
428        .output()
429        .ok()?;
430    if !out.status.success() {
431        errors.push(format!(
432            "ruby syntax: {}",
433            String::from_utf8_lossy(&out.stderr)
434                .lines()
435                .next()
436                .unwrap_or("")
437        ));
438    }
439
440    let _ = std::fs::remove_file(&path);
441    Some(errors)
442}
443
444fn validate_php_tools(code: &str) -> Option<Vec<String>> {
445    if Command::new("php").arg("--version").output().is_err() {
446        return None;
447    }
448    let path = write_temp(code, ".php")?;
449    let mut errors = vec![];
450
451    let out = Command::new("php")
452        .args(["-l", path.to_str()?])
453        .output()
454        .ok()?;
455    if !out.status.success() {
456        errors.push(format!(
457            "php syntax: {}",
458            String::from_utf8_lossy(&out.stdout)
459                .lines()
460                .next()
461                .unwrap_or("")
462        ));
463    }
464
465    let _ = std::fs::remove_file(&path);
466    Some(errors)
467}
468
469fn validate_kotlin_tools(code: &str) -> Option<Vec<String>> {
470    if Command::new("ktlint").arg("--version").output().is_err() {
471        return None;
472    }
473    let path = write_temp(code, ".kt")?;
474    let mut errors = vec![];
475
476    let out = Command::new("ktlint")
477        .args(["--log-level=error", path.to_str()?])
478        .output()
479        .ok()?;
480    if !out.status.success() {
481        for line in String::from_utf8_lossy(&out.stdout).lines().take(3) {
482            if !line.trim().is_empty() {
483                errors.push(format!("ktlint: {line}"));
484            }
485        }
486    }
487
488    let _ = std::fs::remove_file(&path);
489    Some(errors)
490}
491
492#[cfg(test)]
493mod tests {
494    use super::*;
495
496    #[test]
497    fn test_unknown_backend() {
498        let errors = validate_structural("some code", "unknown-backend");
499        assert_eq!(errors.len(), 1);
500        assert!(errors[0].contains("unknown backend"));
501    }
502
503    #[test]
504    fn test_rust_backends_skip() {
505        assert!(validate_structural("anything", "rust-sqlx").is_empty());
506        assert!(validate_structural("anything", "rust-tokio-postgres").is_empty());
507    }
508
509    #[test]
510    fn test_python_valid() {
511        let code = r#"from dataclasses import dataclass
512
513@dataclass
514class ListUsersRow:
515    id: int
516    name: str
517
518async def list_users(conn) -> list[ListUsersRow]:
519    pass
520"#;
521        let errors = validate_structural(code, "python-psycopg3");
522        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
523    }
524
525    #[test]
526    fn test_python_invalid_typing() {
527        let code = r#"from typing import Optional
528
529@dataclass
530class Row:
531    id: int
532
533def query() -> List[Row]:
534    pass
535"#;
536        let errors = validate_structural(code, "python-asyncpg");
537        assert!(errors.iter().any(|e| e.contains("Optional")));
538        assert!(errors.iter().any(|e| e.contains("List[")));
539    }
540
541    #[test]
542    fn test_typescript_valid() {
543        let code = r#"export interface ListUsersRow {
544  id: number;
545  name: string;
546}
547
548export async function listUsers(): Promise<ListUsersRow[]> {
549  // ...
550}
551"#;
552        let errors = validate_structural(code, "typescript-postgres");
553        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
554    }
555
556    #[test]
557    fn test_go_valid() {
558        let code = "package db\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n)\n\ntype ListUsersRow struct {\n\tID   int    `json:\"id\"`\n\tName string `json:\"name\"`\n}\n\nfunc ListUsers(ctx context.Context) ([]ListUsersRow, error) {\n\treturn nil, nil\n}\n";
559        let errors = validate_structural(code, "go-pgx");
560        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
561    }
562
563    #[test]
564    fn test_php_valid() {
565        let code = r#"<?php
566
567declare(strict_types=1);
568
569readonly class ListUsersRow {
570    public function __construct(
571        public int $id,
572        public string $name,
573    ) {}
574}
575
576function listUsers($pdo): array {
577    return [];
578}
579"#;
580        let errors = validate_structural(code, "php-pdo");
581        assert!(errors.is_empty(), "expected no errors, got: {:?}", errors);
582    }
583}