vrl 0.32.0

Vector Remap Language
Documentation
use crate::compiler::prelude::*;

fn split(value: &Value, limit: Value, pattern: Value) -> Resolved {
    let string = value.try_bytes_utf8_lossy()?;
    let limit = match limit.try_integer()? {
        x if x < 0 => 0,
        // TODO consider removal options
        #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
        x => x as usize,
    };
    match pattern {
        Value::Regex(pattern) => Ok(pattern
            .splitn(string.as_ref(), limit)
            .collect::<Vec<_>>()
            .into()),
        Value::Bytes(bytes) => {
            let pattern = String::from_utf8_lossy(&bytes);

            Ok(string
                .splitn(limit, pattern.as_ref())
                .collect::<Vec<_>>()
                .into())
        }
        value => Err(ValueError::Expected {
            got: value.kind(),
            expected: Kind::regex() | Kind::bytes(),
        }
        .into()),
    }
}

#[derive(Clone, Copy, Debug)]
pub struct Split;

impl Function for Split {
    fn identifier(&self) -> &'static str {
        "split"
    }

    fn usage(&self) -> &'static str {
        "Splits the `value` string using `pattern`."
    }

    fn category(&self) -> &'static str {
        Category::String.as_ref()
    }

    fn return_kind(&self) -> u16 {
        kind::ARRAY
    }

    fn return_rules(&self) -> &'static [&'static str] {
        &[
            "If `limit` is specified, the remainder of the string is returned unsplit after `limit` has been reached.",
        ]
    }

    fn parameters(&self) -> &'static [Parameter] {
        const PARAMETERS: &[Parameter] = &[
            Parameter::required("value", kind::BYTES, "The string to split."),
            Parameter::required(
                "pattern",
                kind::BYTES | kind::REGEX,
                "The string is split whenever this pattern is matched.",
            ),
            Parameter::optional(
                "limit",
                kind::INTEGER,
                "The maximum number of substrings to return.",
            ),
        ];
        PARAMETERS
    }

    fn examples(&self) -> &'static [Example] {
        &[
            example! {
                title: "Split a string (no limit)",
                source: r#"split("apples and pears and bananas", " and ")"#,
                result: Ok(r#"["apples", "pears", "bananas"]"#),
            },
            example! {
                title: "Split a string (with a limit)",
                source: r#"split("apples and pears and bananas", " and ", limit: 2)"#,
                result: Ok(r#"["apples", "pears and bananas"]"#),
            },
            example! {
                title: "Split string",
                source: r#"split("foobar", "b")"#,
                result: Ok(r#"["foo", "ar"]"#),
            },
            example! {
                title: "Split regex",
                source: r#"split("barbaz", r'ba')"#,
                result: Ok(r#"["", "r", "z"]"#),
            },
        ]
    }

    fn compile(
        &self,
        _state: &state::TypeState,
        _ctx: &mut FunctionCompileContext,
        arguments: ArgumentList,
    ) -> Compiled {
        let value = arguments.required("value");
        let pattern = arguments.required("pattern");
        let limit = arguments.optional("limit").unwrap_or(expr!(999_999_999));

        Ok(SplitFn {
            value,
            pattern,
            limit,
        }
        .as_expr())
    }
}

#[derive(Debug, Clone)]
pub(crate) struct SplitFn {
    value: Box<dyn Expression>,
    pattern: Box<dyn Expression>,
    limit: Box<dyn Expression>,
}

impl FunctionExpression for SplitFn {
    fn resolve(&self, ctx: &mut Context) -> Resolved {
        let value = self.value.resolve(ctx)?;
        let limit = self.limit.resolve(ctx)?;
        let pattern = self.pattern.resolve(ctx)?;

        split(&value, limit, pattern)
    }

    fn type_def(&self, _: &state::TypeState) -> TypeDef {
        TypeDef::array(Collection::from_unknown(Kind::bytes())).infallible()
    }
}

#[cfg(test)]
#[allow(clippy::trivial_regex)]
mod test {
    use super::*;
    use crate::value;

    test_function![
        split => Split;

        empty {
            args: func_args![value: "",
                             pattern: " "
            ],
            want: Ok(value!([""])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        single {
            args: func_args![value: "foo",
                             pattern: " "
            ],
            want: Ok(value!(["foo"])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        long {
            args: func_args![value: "This is a long string.",
                             pattern: " "
            ],
            want: Ok(value!(["This", "is", "a", "long", "string."])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        regex {
            args: func_args![value: "This is a long string",
                             pattern: Value::Regex(regex::Regex::new(" ").unwrap().into()),
                             limit: 2
            ],
            want: Ok(value!(["This", "is a long string"])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        non_space {
            args: func_args![value: "ThisaisAlongAstring.",
                             pattern: Value::Regex(regex::Regex::new("(?i)a").unwrap().into())
            ],
            want: Ok(value!(["This", "is", "long", "string."])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        unicode {
             args: func_args![value: "˙ƃuᴉɹʇs ƃuol ɐ sᴉ sᴉɥ┴",
                              pattern: " "
             ],
             want: Ok(value!(["˙ƃuᴉɹʇs", "ƃuol", "ɐ", "sᴉ", "sᴉɥ┴"])),
             tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
         }

        limit {
            args: func_args![value: "This is a long string.",
                             pattern: " ",
                             limit: 2
            ],
            want: Ok(value!(["This", "is a long string."])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        over_length_limit {
            args: func_args![value: "This is a long string.",
                             pattern: " ",
                             limit: 2000
            ],
            want: Ok(value!(["This", "is", "a", "long", "string."])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        zero_limit {
            args: func_args![value: "This is a long string.",
                             pattern: " ",
                             limit: 0
            ],
            want: Ok(value!([])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }

        negative_limit {
            args: func_args![value: "This is a long string.",
                             pattern: " ",
                             limit: -1
            ],
            want: Ok(value!([])),
            tdef: TypeDef::array(Collection::from_unknown(Kind::bytes())),
        }
    ];
}