polyglot-sql 0.3.3

SQL parsing, validating, formatting, and dialect translation library
Documentation
/// Regression tests for Snowflake SQL parsing issues.
///
/// These tests cover SQL patterns generated by the Snowflake Python Connector
/// (snowflake-connector-python) that previously failed to parse.
///
/// Related: https://github.com/tobilg/polyglot/issues/164
use polyglot_sql::dialects::DialectType;
use polyglot_sql::{parse_one, transpile};

fn parse_and_generate(sql: &str) -> String {
    let result = transpile(sql, DialectType::Snowflake, DialectType::Snowflake).unwrap();
    result.join(";\n")
}

// =====================================================================
// Category 1: TIMESTAMP_TZ(N) precision casts
// =====================================================================

#[test]
fn test_snowflake_timestamp_tz_cast_precision() {
    let sql = r#"SELECT '1987-01-30 23:59:59.123456789'::TIMESTAMP_TZ(9)"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "TIMESTAMP_TZ(9) cast failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_timestamp_ntz_cast_precision() {
    let sql = r#"SELECT '1987-01-30 23:59:59.123456789'::TIMESTAMP_NTZ(9)"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "TIMESTAMP_NTZ(9) cast failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_timestamp_ltz_cast_precision() {
    let sql = r#"SELECT '1987-01-30 23:59:59.123456789'::TIMESTAMP_LTZ(9)"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "TIMESTAMP_LTZ(9) cast failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_timestamptz_no_underscore_precision() {
    let sql = r#"SELECT '2024-01-01'::TIMESTAMPTZ(6)"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "TIMESTAMPTZ(6) cast failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_timestamp_tz_in_insert() {
    let sql = r#"INSERT INTO t (id, dt)
        SELECT seq4() + 0, '1987-01-30 23:59:59.123456789'::TIMESTAMP_TZ(9)
        FROM TABLE(GENERATOR(ROWCOUNT => 10000))"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "TIMESTAMP_TZ(9) in INSERT failed: {:?}",
        result.err()
    );
}

// Roundtrip: verify the generated SQL re-parses correctly
#[test]
fn test_snowflake_timestamp_cast_roundtrip() {
    let sql = r#"SELECT '1987-01-30 23:59:59.123456789'::TIMESTAMP_TZ(9)"#;
    let gen = parse_and_generate(sql);
    assert_eq!(
        gen,
        "SELECT CAST('1987-01-30 23:59:59.123456789' AS TIMESTAMPTZ(9))"
    );

    let sql2 = r#"SELECT '1987-01-30 23:59:59.123456789'::TIMESTAMP_NTZ(9)"#;
    let gen2 = parse_and_generate(sql2);
    assert_eq!(
        gen2,
        "SELECT CAST('1987-01-30 23:59:59.123456789' AS TIMESTAMPNTZ(9))"
    );

    let sql3 = r#"SELECT '1987-01-30 23:59:59.123456789'::TIMESTAMPLTZ(9)"#;
    let gen3 = parse_and_generate(sql3);
    assert_eq!(
        gen3,
        "SELECT CAST('1987-01-30 23:59:59.123456789' AS TIMESTAMPLTZ(9))"
    );
}

// =====================================================================
// Category 2: PUT file upload commands
// =====================================================================

#[test]
fn test_snowflake_put_file_to_stage() {
    let sql = "PUT 'file:///tmp/file0.txt' @my_stage parallel=4 source_compression=auto_detect";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "PUT with quoted file URI failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_put_file_with_placeholder() {
    let sql = "PUT file:///tmp/data.csv ?";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "PUT with ? placeholder failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_put_quoted_stage() {
    let sql = "PUT 'file:///tmp/data.csv' '@SYSTEM$BIND/1234/0/data_0_0_0.snappy.parquet' OVERWRITE=TRUE AUTO_COMPRESS=FALSE SOURCE_COMPRESSION=NONE";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "PUT with quoted stage failed: {:?}",
        result.err()
    );
}

// Minimal PUT with just source and ? target
#[test]
fn test_snowflake_put_minimal_placeholder() {
    let sql = "PUT 'file:///tmp/f.csv' ?";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "PUT minimal with ? failed: {:?}",
        result.err()
    );
}

// PUT roundtrip
#[test]
fn test_snowflake_put_roundtrip() {
    let sql = "PUT 'file:///tmp/data.csv' @my_stage";
    let gen = parse_and_generate(sql);
    // Verify the generated SQL re-parses
    let result = parse_one(&gen, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "PUT roundtrip failed to re-parse: {:?}",
        result.err()
    );
}

// =====================================================================
// Category 3: COPY INTO with bind params
// =====================================================================

#[test]
fn test_snowflake_copy_placeholder_target() {
    let sql = r#"COPY INTO "DB"."SCHEMA"."TABLE" FROM ? FILE_FORMAT = (TYPE = CSV)"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "COPY INTO with ? source failed: {:?}",
        result.err()
    );
}

#[test]
fn test_snowflake_copy_into_with_on_error_placeholder() {
    let sql = r#"COPY INTO identifier(?) (name,points)
        FROM (SELECT $1:name::TEXT,$1:points::NUMBER(38, 0)
        FROM '@SNOWPARK_TEMP_STAGE_ABC123')
        FILE_FORMAT=(TYPE=PARQUET USE_VECTORIZED_SCANNER=False
        COMPRESSION=auto BINARY_AS_TEXT=FALSE) PURGE=TRUE ON_ERROR=?"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(result.is_ok(), "COPY ON_ERROR=? failed: {:?}", result.err());
}

#[test]
fn test_snowflake_copy_into_with_use_logical_type() {
    let sql = r#"COPY INTO identifier(?) ("DT")
        FROM (SELECT $1:"DT" FROM '@"STAGE_ABC"')
        FILE_FORMAT=(TYPE=PARQUET USE_VECTORIZED_SCANNER=False
        COMPRESSION=auto USE_LOGICAL_TYPE = TRUE) PURGE=TRUE ON_ERROR=?"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "USE_LOGICAL_TYPE failed: {:?}",
        result.err()
    );
}

// Reversed parameter order: ON_ERROR before PURGE
#[test]
fn test_snowflake_copy_reversed_param_order() {
    let sql = r#"COPY INTO identifier(?) ("A")
        FROM (SELECT $1:"A" FROM '@"STG"')
        FILE_FORMAT=(TYPE=PARQUET COMPRESSION=auto) ON_ERROR=? PURGE=TRUE"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "COPY reversed param order failed: {:?}",
        result.err()
    );
}

// Minimal COPY with ? source
#[test]
fn test_snowflake_copy_minimal_placeholder() {
    let sql = r#"COPY INTO "T" FROM ?"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "COPY minimal with ? source failed: {:?}",
        result.err()
    );
}

// COPY roundtrip
#[test]
fn test_snowflake_copy_roundtrip() {
    let sql = r#"COPY INTO "DB"."SCHEMA"."TABLE" FROM ? FILE_FORMAT = (TYPE = CSV)"#;
    let gen = parse_and_generate(sql);
    let result = parse_one(&gen, DialectType::Snowflake);
    assert!(
        result.is_ok(),
        "COPY roundtrip failed to re-parse: {:?}",
        result.err()
    );
}

// =====================================================================
// Category 5: INSERT OVERWRITE ALL
// =====================================================================

#[test]
fn test_snowflake_insert_overwrite_all() {
    let sql = "INSERT OVERWRITE ALL INTO x SELECT 1";
    let gen = parse_and_generate(sql);
    assert_eq!(gen, "INSERT OVERWRITE ALL INTO x SELECT 1");
}

// =====================================================================
// Exact SQL from issue #164 — verifies the issue is resolved
// =====================================================================

#[test]
fn issue_164_cat1_placeholder_bind_params() {
    let sql = r#"COPY INTO identifier(?) /* Python:snowflake.connector.pandas_tools.write_pandas() */ ("name","points") FROM (SELECT $1:"name"::TEXT,$1:"points"::NUMBER(38, 0) FROM '@"SNOWPARK_TEMP_STAGE_A49SXIO2Z6"') FILE_FORMAT=(TYPE=PARQUET USE_VECTORIZED_SCANNER=False COMPRESSION=auto BINARY_AS_TEXT=FALSE) PURGE=TRUE ON_ERROR=?"#;
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(result.is_ok(), "Issue #164 Cat1 failed: {:?}", result.err());
}

#[test]
fn issue_164_cat2_put_placeholder_stage() {
    let sql =
        "PUT 'file:///var/folders/temp/file0.txt' ? parallel=4 source_compression=auto_detect";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(result.is_ok(), "Issue #164 Cat2 failed: {:?}", result.err());
}

#[test]
fn issue_164_cat3_timestamp_precision() {
    let sql = "INSERT INTO test_timestamp_mixed_batches (id, dt) SELECT seq4() + 0, '1987-01-30 23:59:59.123456789'::TIMESTAMP_NTZ(9) FROM TABLE(GENERATOR(ROWCOUNT => 10000))";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(result.is_ok(), "Issue #164 Cat3 failed: {:?}", result.err());
}

#[test]
fn issue_164_cat4_insert_overwrite_all() {
    let sql = "insert overwrite all when c1 > 20 then into test_tgt1 values (c1) when c1 > 10 then into test_tgt1 values (c1) into test_tgt2 values (c1) else into test_tgt2 values (c1) select c1 from test_src_multi";
    let result = parse_one(sql, DialectType::Snowflake);
    assert!(result.is_ok(), "Issue #164 Cat4 failed: {:?}", result.err());
}

/// Category 5: `name%` is a non-standard unquoted identifier generated by
/// the Snowflake Python connector's `write_pandas()`. This is arguably a
/// connector-side bug — the identifier should be quoted as `"name%"`.
/// Documenting as a known limitation; not addressed in this PR.
#[test]
fn issue_164_cat5_name_percent_known_limitation() {
    let sql = r#"CREATE TABLE IF NOT EXISTS identifier(?) (name% TEXT, points NUMBER(38, 0)) /* Python:snowflake.connector.pandas_tools.write_pandas() */"#;
    let result = parse_one(sql, DialectType::Snowflake);
    // Expected to fail — unquoted identifiers with % are not valid SQL.
    // The connector should quote these as "name%" instead.
    assert!(
        result.is_err(),
        "name% should fail to parse (known limitation)"
    );
}

// =====================================================================
// Compatibility: verify non-COPY/PUT Snowflake syntax still works
// =====================================================================

#[test]
fn test_snowflake_regular_select_unaffected() {
    let sql = r#"SELECT a, b FROM "DB"."SCHEMA"."TABLE" WHERE a > 1"#;
    let gen = parse_and_generate(sql);
    let result = parse_one(&gen, DialectType::Snowflake);
    assert!(result.is_ok(), "Regular SELECT broken: {:?}", result.err());
}

#[test]
fn test_snowflake_cte_unaffected() {
    let sql = "WITH cte AS (SELECT 1 AS x) SELECT x FROM cte";
    let gen = parse_and_generate(sql);
    let result = parse_one(&gen, DialectType::Snowflake);
    assert!(result.is_ok(), "CTE broken: {:?}", result.err());
}

#[test]
fn test_snowflake_create_table_unaffected() {
    let sql = r#"CREATE TABLE "DB"."S"."T" (id INT, name TEXT)"#;
    let gen = parse_and_generate(sql);
    let result = parse_one(&gen, DialectType::Snowflake);
    assert!(result.is_ok(), "CREATE TABLE broken: {:?}", result.err());
}