vrl 0.32.0

Vector Remap Language
Documentation
use crate::compiler::prelude::*;
use std::sync::LazyLock;
use xxhash_rust::{xxh3, xxh32, xxh64};

static DEFAULT_VARIANT: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("XXH32")));

const VALID_VARIANTS: &[&str] = &["XXH32", "XXH64", "XXH3-64", "XXH3-128"];

static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
    vec![
        Parameter::required(
            "value",
            kind::BYTES,
            "The string to calculate the hash for.",
        ),
        Parameter::optional(
            "variant",
            kind::BYTES,
            "The xxHash hashing algorithm to use.",
        )
        .default(&DEFAULT_VARIANT),
    ]
});

#[allow(clippy::cast_possible_wrap)]
fn xxhash(value: Value, variant: &Value) -> Resolved {
    let bytes = value.try_bytes()?;
    let variant = variant.try_bytes_utf8_lossy()?.as_ref().to_uppercase();

    match variant.as_str() {
        "XXH32" => {
            let result = xxh32::xxh32(&bytes, 0);
            Ok(Value::from(i64::from(result)))
        }
        "XXH64" => {
            let result = xxh64::xxh64(&bytes, 0);
            Ok(Value::from(result as i64))
        }
        "XXH3-64" => {
            let result = xxh3::xxh3_64(&bytes);
            Ok(Value::from(result as i64))
        }
        "XXH3-128" => {
            let result = xxh3::xxh3_128(&bytes);
            // Convert u128 to string representation since VRL doesn't have native u128 support
            Ok(Value::from(result.to_string()))
        }
        _ => Err("Variant must be either 'XXH32', 'XXH64', 'XXH3-64', or 'XXH3-128'".into()),
    }
}

#[derive(Clone, Copy, Debug)]
pub struct Xxhash;

impl Function for Xxhash {
    fn identifier(&self) -> &'static str {
        "xxhash"
    }

    fn summary(&self) -> &'static str {
        "calculate xxhash hash"
    }

    fn usage(&self) -> &'static str {
        "Calculates a [xxHash](https://github.com/DoumanAsh/xxhash-rust) hash of the `value`."
    }

    fn category(&self) -> &'static str {
        Category::Checksum.as_ref()
    }

    fn return_kind(&self) -> u16 {
        kind::INTEGER | kind::BYTES
    }

    fn parameters(&self) -> &'static [Parameter] {
        PARAMETERS.as_slice()
    }

    fn notices(&self) -> &'static [&'static str] {
        &[indoc! {"
            Due to limitations in the underlying VRL data types, this function converts the unsigned
            64-bit integer hash result to a signed 64-bit integer. Results higher than the signed
            64-bit integer maximum value wrap around to negative values. For the XXH3-128 hash
            algorithm, values are returned as a string.
        "}]
    }

    fn examples(&self) -> &'static [Example] {
        &[
            example! {
                title: "Calculate a hash using the default (XXH32) algorithm",
                source: r#"xxhash("foo")"#,
                result: Ok("3792637401"),
            },
            example! {
                title: "Calculate a hash using the XXH32 algorithm",
                source: r#"xxhash("foo", "XXH32")"#,
                result: Ok("3792637401"),
            },
            example! {
                title: "Calculate a hash using the XXH64 algorithm",
                source: r#"xxhash("foo", "XXH64")"#,
                result: Ok("3728699739546630719"),
            },
            example! {
                title: "Calculate a hash using the XXH3-64 algorithm",
                source: r#"xxhash("foo", "XXH3-64")"#,
                result: Ok("-6093828362558603894"),
            },
            example! {
                title: "Calculate a hash using the XXH3-128 algorithm",
                source: r#"xxhash("foo", "XXH3-128")"#,
                result: Ok(r#""161745101148472925293886522910304009610""#),
            },
        ]
    }

    fn compile(
        &self,
        _: &state::TypeState,
        _ctx: &mut FunctionCompileContext,
        arguments: ArgumentList,
    ) -> Compiled {
        let value = arguments.required("value");
        let variant = arguments.optional("variant");

        Ok(XxhashFn { value, variant }.as_expr())
    }
}

#[derive(Debug, Clone)]
struct XxhashFn {
    value: Box<dyn Expression>,
    variant: Option<Box<dyn Expression>>,
}

impl FunctionExpression for XxhashFn {
    fn resolve(&self, ctx: &mut Context) -> Resolved {
        let value = self.value.resolve(ctx)?;
        let variant = self
            .variant
            .map_resolve_with_default(ctx, || DEFAULT_VARIANT.clone())?;

        xxhash(value, &variant)
    }

    fn type_def(&self, state: &state::TypeState) -> TypeDef {
        let variant = self.variant.as_ref();
        let valid_static_variant = variant.is_none()
            || variant
                .and_then(|variant| variant.resolve_constant(state))
                .and_then(|variant| variant.try_bytes_utf8_lossy().map(|s| s.to_string()).ok())
                .is_some_and(|variant| VALID_VARIANTS.contains(&variant.to_uppercase().as_str()));

        if valid_static_variant {
            TypeDef::bytes().infallible()
        } else {
            TypeDef::bytes().fallible()
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::value;

    test_function![
        xxhash => Xxhash;

    hash_xxh32_default {
        args: func_args![value: "foo"],
        want: Ok(value!(3_792_637_401_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    hash_xxh32 {
        args: func_args![value: "foo", variant: "XXH32"],
        want: Ok(value!(3_792_637_401_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    hash_xxh64 {
        args: func_args![value: "foo", variant: "XXH64"],
        want: Ok(value!(3_728_699_739_546_630_719_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    hash_xxh3_64 {
        args: func_args![value: "foo", variant: "XXH3-64"],
        want: Ok(value!(-6_093_828_362_558_603_894_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    hash_xxh3_128 {
        args: func_args![value: "foo", variant: "XXH3-128"],
        want: Ok(value!("161745101148472925293886522910304009610")),
        tdef: TypeDef::bytes().infallible(),
    }

    long_string_xxh32 {
        args: func_args![value: "vrl xxhash hash function"],
        want: Ok(value!(919_261_294_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    long_string_xxh64 {
        args: func_args![value: "vrl xxhash hash function", variant: "XXH64"],
        want: Ok(value!(7_826_295_616_420_964_813_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    long_string_xxh3_64 {
        args: func_args![value: "vrl xxhash hash function", variant: "XXH3-64"],
        want: Ok(value!(-7_714_906_473_624_552_998_i64)),
        tdef: TypeDef::bytes().infallible(),
    }

    long_string_xxh3_128 {
        args: func_args![value: "vrl xxhash hash function", variant: "XXH3-128"],
        want: Ok(value!("89621485359950851650871997518391357172")),
        tdef: TypeDef::bytes().infallible(),
    }

    hash_invalid_variant {
        args: func_args![value: "foo", variant: "XXH16"],
        want: Err("Variant must be either 'XXH32', 'XXH64', 'XXH3-64', or 'XXH3-128'"),
        tdef: TypeDef::bytes().fallible(),
    }
    ];
}