hamelin_legacy 0.4.4

Legacy AST translation code for Hamelin (to be deprecated)
Documentation
use hamelin_legacy::Compiler;
use hamelin_lib::catalog::{CatalogBuilder, CatalogProvider, DataSetBuilder};
use hamelin_lib::types::{INT, STRING};
use rstest::rstest;
use std::sync::Arc;

use crate::shared;

#[rstest]
#[case(
    r#"
        LET event.original = 'something'
        | PARSE 'x * y' e1
    "#,
    r#"
        SELECT
          regexp_extract("event"."original", '(?s)x (.*?) y', 1) AS "e1",
          "event"
        FROM
          (
            SELECT
              CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
          )
        WHERE
            regexp_count("event"."original", '(?s)x (.*?) y') > 0
    "#
)]
#[case(
    r#"
        FROM table | PARSE t1 'x * y * z' AS e1, e2 | SELECT e1, e2
    "#,
    r#"
        SELECT
            "e1"
            , "e2"
        FROM (
            SELECT
                regexp_extract("t1", '(?s)x (.*?) y (.*?) z', 1) AS "e1"
                , regexp_extract("t1", '(?s)x (.*?) y (.*?) z', 2) AS "e2"
                , "t1"
                , "t2"
            FROM
                "table"
            WHERE
                regexp_count("t1", '(?s)x (.*?) y (.*?) z') > 0
        )
    "#
)]
#[case(
    r#"
        FROM table
        | LET event.original = 'something'
        | PARSE 'x * y * z' AS e1, e2
        | SELECT t1
    "#,
    r#"SELECT "t1"
       FROM
       (
        SELECT
             CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
             "t1",
             "t2"
         FROM
            "table"
       )
       WHERE regexp_count( "event"."original", '(?s)x (.*?) y (.*?) z' ) > 0
    "#
)]
#[case(
    r#"
        FROM table
        | LET event.original = 'something'
        | DROP t2
        | PARSE 'x * y * z' AS e1, e2
        | SELECT e1, e2
    "#,
    r#"
        SELECT  "e1", "e2"
        FROM
        (
            SELECT  regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 1 ) AS "e1"
                    , regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 2 ) AS "e2"
                    , "event"
                    , "t1"
            FROM
            (
                SELECT
                    CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
                    "t1"
                FROM
                    "table"
            )
            WHERE regexp_count( "event"."original", '(?s)x (.*?) y (.*?) z' ) > 0
        )
    "#
)]
#[case(
    r#"
        FROM table
        | LET event.original = 'something'
        | LET schnitzel.mit = 1
        | PARSE 'x * y * z' AS e1, schnitzel.mit.pommes
        | SELECT schnitzel
    "#,
    r#"
        SELECT
            "schnitzel"
        FROM (
            SELECT
                regexp_extract("event"."original", '(?s)x (.*?) y (.*?) z', 1) AS "e1"
                , CAST(ROW(ROW(regexp_extract("event"."original", '(?s)x (.*?) y (.*?) z', 2))) AS ROW("mit" ROW("pommes" VARCHAR))) AS "schnitzel"
                , "event"
                , "t1"
                , "t2"
            FROM
               (
                SELECT
                     CAST(ROW(1) AS ROW("mit" BIGINT)) AS "schnitzel",
                     CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
                     "t1",
                     "t2"
                 FROM
                    "table"
               )
            WHERE
                regexp_count("event"."original", '(?s)x (.*?) y (.*?) z') > 0
        )
    "#
)]
#[case(
    r#"
        FROM table
        | LET event.original = 'something'
        | PARSE 'x * y * z' AS e1, e2 NODROP
        | SELECT e1
    "#,
    r#"
        SELECT
            "e1"
        FROM
            (
            SELECT regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 1 ) AS "e1"
                , regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 2 ) AS "e2"
                , "event"
                , "t1"
                , "t2"
             FROM
               (
                SELECT
                     CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
                     "t1",
                     "t2"
                 FROM
                    "table"
               )
            )
    "#
)]
#[case(
    r#"
        FROM table
        | LET event.original = 'something'
        | PARSE 'x * y * z' AS e1, e2 NODROP
        | SELECT t1
    "#,
    r#"
       SELECT
            "t1"
       FROM
       (
        SELECT
             CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
             "t1",
             "t2"
         FROM
            "table"
       )
    "#
)]
#[case(
    r#"
        LET event.original = 'something'
        | PARSE  '*-**' AS e_head, e_tail
    "#,
    r#"
        SELECT
          regexp_extract("event"."original", '(?s)(.*?)\-(.*)', 1 ) AS "e_head"
          ,regexp_extract("event"."original", '(?s)(.*?)\-(.*)', 2 ) AS "e_tail"
          ,"event"
        FROM
          (
            SELECT
              CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
          )
        WHERE
            regexp_count("event"."original", '(?s)(.*?)\-(.*)') > 0
    "#
)]
#[case(
    r#"
        LET event.original = 'something'
        | PARSE '*-*-*-*-*' AS uuid_first, _, _, _, uuid_last
    "#,
    r#"
        SELECT
          regexp_extract("event"."original", '(?s)(.*?)\-(.*?)\-(.*?)\-(.*?)\-(.*)', 1 ) AS "uuid_first"
          ,regexp_extract("event"."original", '(?s)(.*?)\-(.*?)\-(.*?)\-(.*?)\-(.*)', 5 ) AS "uuid_last"
          ,"event"
        FROM
          (
            SELECT
              CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
          )
        WHERE
            regexp_count("event"."original", '(?s)(.*?)\-(.*?)\-(.*?)\-(.*?)\-(.*)') > 0
    "#
)]
#[case(
    r#"
        LET event.original = 'something'
        | PARSE 'app=* * user=*' AS app, _, user
    "#,
    r#"
        SELECT
          regexp_extract("event"."original", '(?s)app=(.*?) (.*?) user=(.*)', 1 ) AS "app"
          ,regexp_extract("event"."original", '(?s)app=(.*?) (.*?) user=(.*)', 3 ) AS "user"
          ,"event"
        FROM
          (
            SELECT
              CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
          )
        WHERE
            regexp_count("event"."original", '(?s)app=(.*?) (.*?) user=(.*)') > 0
    "#
)]
pub fn test_parse(#[case] hamelin: String, #[case] expected: String) -> anyhow::Result<()> {
    let catalog = CatalogBuilder::default()
        .with_data_set(
            DataSetBuilder::new("table".into())
                .with("t1", STRING)
                .with("t2", INT),
        )
        .build();

    let mut compiler = Compiler::new();
    let provider = CatalogProvider::try_from(catalog).unwrap();
    compiler.set_environment_provider(Arc::new(provider));
    shared::compare(Arc::new(compiler), hamelin, expected)
}

#[rstest]
#[case::unbound_column(
    r#"
        LET event.original = 'something'
        | PARSE t3 'x * y' AS e1
    "#,
    "error while translating",
    Some("unbound column reference: t3\nin environment {\n    event: {original: string}\n}\n")
)]
#[case::no_anchors(
    r#"
        LET event.original = 'something'
        | PARSE 'x y' AS e1
    "#,
    "anchor pattern must contain at least one *",
    None
)]
#[case::too_many_columns(
    r#"
        LET event.original = 'something'
        | PARSE 'x * y' AS e1, e2
    "#,
    "anchor pattern must contain a * for 2 new columns",
    None
)]
#[case::not_enough_columns(
    r#"
        LET event.original = 'something'
        | PARSE 'x * y * z' AS e1
    "#,
    "anchor pattern must contain a * for 1 new columns",
    None
)]
pub fn test_parse_errors(
    #[case] hamelin: String,
    #[case] expected_message: String,
    #[case] expected_source: Option<&str>,
) {
    let compiler = Compiler::new();
    match compiler.compile_query(hamelin) {
        Ok(_) => panic!("Expected error"),
        Err(te) => {
            let e = &te.errors[0].error;
            assert_eq!(e.primary.message, expected_message);
            if let Some(expected_source) = expected_source {
                assert_eq!(e.source_desc.clone().unwrap().to_string(), expected_source);
            }
        }
    }
}