use hamelin_legacy::Compiler;
use hamelin_lib::catalog::{CatalogBuilder, CatalogProvider, DataSetBuilder};
use hamelin_lib::types::{INT, STRING};
use rstest::rstest;
use std::sync::Arc;
use crate::shared;
#[rstest]
#[case(
r#"
LET event.original = 'something'
| PARSE 'x * y' e1
"#,
r#"
SELECT
regexp_extract("event"."original", '(?s)x (.*?) y', 1) AS "e1",
"event"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
)
WHERE
regexp_count("event"."original", '(?s)x (.*?) y') > 0
"#
)]
#[case(
r#"
FROM table | PARSE t1 'x * y * z' AS e1, e2 | SELECT e1, e2
"#,
r#"
SELECT
"e1"
, "e2"
FROM (
SELECT
regexp_extract("t1", '(?s)x (.*?) y (.*?) z', 1) AS "e1"
, regexp_extract("t1", '(?s)x (.*?) y (.*?) z', 2) AS "e2"
, "t1"
, "t2"
FROM
"table"
WHERE
regexp_count("t1", '(?s)x (.*?) y (.*?) z') > 0
)
"#
)]
#[case(
r#"
FROM table
| LET event.original = 'something'
| PARSE 'x * y * z' AS e1, e2
| SELECT t1
"#,
r#"SELECT "t1"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
"t1",
"t2"
FROM
"table"
)
WHERE regexp_count( "event"."original", '(?s)x (.*?) y (.*?) z' ) > 0
"#
)]
#[case(
r#"
FROM table
| LET event.original = 'something'
| DROP t2
| PARSE 'x * y * z' AS e1, e2
| SELECT e1, e2
"#,
r#"
SELECT "e1", "e2"
FROM
(
SELECT regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 1 ) AS "e1"
, regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 2 ) AS "e2"
, "event"
, "t1"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
"t1"
FROM
"table"
)
WHERE regexp_count( "event"."original", '(?s)x (.*?) y (.*?) z' ) > 0
)
"#
)]
#[case(
r#"
FROM table
| LET event.original = 'something'
| LET schnitzel.mit = 1
| PARSE 'x * y * z' AS e1, schnitzel.mit.pommes
| SELECT schnitzel
"#,
r#"
SELECT
"schnitzel"
FROM (
SELECT
regexp_extract("event"."original", '(?s)x (.*?) y (.*?) z', 1) AS "e1"
, CAST(ROW(ROW(regexp_extract("event"."original", '(?s)x (.*?) y (.*?) z', 2))) AS ROW("mit" ROW("pommes" VARCHAR))) AS "schnitzel"
, "event"
, "t1"
, "t2"
FROM
(
SELECT
CAST(ROW(1) AS ROW("mit" BIGINT)) AS "schnitzel",
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
"t1",
"t2"
FROM
"table"
)
WHERE
regexp_count("event"."original", '(?s)x (.*?) y (.*?) z') > 0
)
"#
)]
#[case(
r#"
FROM table
| LET event.original = 'something'
| PARSE 'x * y * z' AS e1, e2 NODROP
| SELECT e1
"#,
r#"
SELECT
"e1"
FROM
(
SELECT regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 1 ) AS "e1"
, regexp_extract( "event"."original", '(?s)x (.*?) y (.*?) z', 2 ) AS "e2"
, "event"
, "t1"
, "t2"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
"t1",
"t2"
FROM
"table"
)
)
"#
)]
#[case(
r#"
FROM table
| LET event.original = 'something'
| PARSE 'x * y * z' AS e1, e2 NODROP
| SELECT t1
"#,
r#"
SELECT
"t1"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event",
"t1",
"t2"
FROM
"table"
)
"#
)]
#[case(
r#"
LET event.original = 'something'
| PARSE '*-**' AS e_head, e_tail
"#,
r#"
SELECT
regexp_extract("event"."original", '(?s)(.*?)\-(.*)', 1 ) AS "e_head"
,regexp_extract("event"."original", '(?s)(.*?)\-(.*)', 2 ) AS "e_tail"
,"event"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
)
WHERE
regexp_count("event"."original", '(?s)(.*?)\-(.*)') > 0
"#
)]
#[case(
r#"
LET event.original = 'something'
| PARSE '*-*-*-*-*' AS uuid_first, _, _, _, uuid_last
"#,
r#"
SELECT
regexp_extract("event"."original", '(?s)(.*?)\-(.*?)\-(.*?)\-(.*?)\-(.*)', 1 ) AS "uuid_first"
,regexp_extract("event"."original", '(?s)(.*?)\-(.*?)\-(.*?)\-(.*?)\-(.*)', 5 ) AS "uuid_last"
,"event"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
)
WHERE
regexp_count("event"."original", '(?s)(.*?)\-(.*?)\-(.*?)\-(.*?)\-(.*)') > 0
"#
)]
#[case(
r#"
LET event.original = 'something'
| PARSE 'app=* * user=*' AS app, _, user
"#,
r#"
SELECT
regexp_extract("event"."original", '(?s)app=(.*?) (.*?) user=(.*)', 1 ) AS "app"
,regexp_extract("event"."original", '(?s)app=(.*?) (.*?) user=(.*)', 3 ) AS "user"
,"event"
FROM
(
SELECT
CAST(ROW('something') AS ROW("original" VARCHAR)) AS "event"
)
WHERE
regexp_count("event"."original", '(?s)app=(.*?) (.*?) user=(.*)') > 0
"#
)]
pub fn test_parse(#[case] hamelin: String, #[case] expected: String) -> anyhow::Result<()> {
let catalog = CatalogBuilder::default()
.with_data_set(
DataSetBuilder::new("table".into())
.with("t1", STRING)
.with("t2", INT),
)
.build();
let mut compiler = Compiler::new();
let provider = CatalogProvider::try_from(catalog).unwrap();
compiler.set_environment_provider(Arc::new(provider));
shared::compare(Arc::new(compiler), hamelin, expected)
}
#[rstest]
#[case::unbound_column(
r#"
LET event.original = 'something'
| PARSE t3 'x * y' AS e1
"#,
"error while translating",
Some("unbound column reference: t3\nin environment {\n event: {original: string}\n}\n")
)]
#[case::no_anchors(
r#"
LET event.original = 'something'
| PARSE 'x y' AS e1
"#,
"anchor pattern must contain at least one *",
None
)]
#[case::too_many_columns(
r#"
LET event.original = 'something'
| PARSE 'x * y' AS e1, e2
"#,
"anchor pattern must contain a * for 2 new columns",
None
)]
#[case::not_enough_columns(
r#"
LET event.original = 'something'
| PARSE 'x * y * z' AS e1
"#,
"anchor pattern must contain a * for 1 new columns",
None
)]
pub fn test_parse_errors(
#[case] hamelin: String,
#[case] expected_message: String,
#[case] expected_source: Option<&str>,
) {
let compiler = Compiler::new();
match compiler.compile_query(hamelin) {
Ok(_) => panic!("Expected error"),
Err(te) => {
let e = &te.errors[0].error;
assert_eq!(e.primary.message, expected_message);
if let Some(expected_source) = expected_source {
assert_eq!(e.source_desc.clone().unwrap().to_string(), expected_source);
}
}
}
}