vrl 0.32.0

Vector Remap Language
Documentation
use crate::compiler::prelude::*;
use std::collections::BTreeMap;
use std::sync::LazyLock;
use url::Url;

static DEFAULT_DEFAULT_KNOWN_PORTS: LazyLock<Value> = LazyLock::new(|| Value::Boolean(false));

static PARAMETERS: LazyLock<Vec<Parameter>> = LazyLock::new(|| {
    vec![
        Parameter::required("value", kind::BYTES, "The text of the URL."),
        Parameter::optional(
            "default_known_ports",
            kind::BOOLEAN,
            "If true and the port number is not specified in the input URL
string (or matches the default port for the scheme), it is
populated from well-known ports for the following schemes:
`http`, `https`, `ws`, `wss`, and `ftp`.",
        )
        .default(&DEFAULT_DEFAULT_KNOWN_PORTS),
    ]
});

#[derive(Clone, Copy, Debug)]
pub struct ParseUrl;

impl Function for ParseUrl {
    fn identifier(&self) -> &'static str {
        "parse_url"
    }

    fn usage(&self) -> &'static str {
        "Parses the `value` in [URL](https://en.wikipedia.org/wiki/URL) format."
    }

    fn category(&self) -> &'static str {
        Category::Parse.as_ref()
    }

    fn internal_failure_reasons(&self) -> &'static [&'static str] {
        &["`value` is not a properly formatted URL."]
    }

    fn return_kind(&self) -> u16 {
        kind::OBJECT
    }

    fn parameters(&self) -> &'static [Parameter] {
        PARAMETERS.as_slice()
    }

    fn examples(&self) -> &'static [Example] {
        &[
            example! {
                title: "Parse URL",
                source: r#"parse_url!("ftp://foo:bar@example.com:4343/foobar?hello=world#123")"#,
                result: Ok(indoc! {r#"
                {
                    "fragment": "123",
                    "host": "example.com",
                    "password": "bar",
                    "path": "/foobar",
                    "port": 4343,
                    "query": {
                        "hello": "world"
                    },
                    "scheme": "ftp",
                    "username": "foo"
                }
            "#}),
            },
            example! {
                title: "Parse URL with default port",
                source: r#"parse_url!("https://example.com", default_known_ports: true)"#,
                result: Ok(indoc! {r#"
                {
                    "fragment": null,
                    "host": "example.com",
                    "password": "",
                    "path": "/",
                    "port": 443,
                    "query": {},
                    "scheme": "https",
                    "username": ""
                }
            "#}),
            },
        ]
    }

    fn compile(
        &self,
        _state: &state::TypeState,
        _ctx: &mut FunctionCompileContext,
        arguments: ArgumentList,
    ) -> Compiled {
        let value = arguments.required("value");
        let default_known_ports = arguments.optional("default_known_ports");

        Ok(ParseUrlFn {
            value,
            default_known_ports,
        }
        .as_expr())
    }
}

#[derive(Debug, Clone)]
struct ParseUrlFn {
    value: Box<dyn Expression>,
    default_known_ports: Option<Box<dyn Expression>>,
}

impl FunctionExpression for ParseUrlFn {
    fn resolve(&self, ctx: &mut Context) -> Resolved {
        let value = self.value.resolve(ctx)?;
        let string = value.try_bytes_utf8_lossy()?;

        let default_known_ports = self
            .default_known_ports
            .map_resolve_with_default(ctx, || DEFAULT_DEFAULT_KNOWN_PORTS.clone())?
            .try_boolean()?;

        Url::parse(&string)
            .map_err(|e| format!("unable to parse url: {e}").into())
            .map(|url| url_to_value(&url, default_known_ports))
    }

    fn type_def(&self, _: &state::TypeState) -> TypeDef {
        TypeDef::object(inner_kind()).fallible()
    }
}

fn url_to_value(url: &Url, default_known_ports: bool) -> Value {
    let mut map = BTreeMap::<&str, Value>::new();

    map.insert("scheme", url.scheme().to_owned().into());
    map.insert("username", url.username().to_owned().into());
    map.insert(
        "password",
        url.password()
            .map(ToOwned::to_owned)
            .unwrap_or_default()
            .into(),
    );
    map.insert("path", url.path().to_owned().into());
    map.insert("host", url.host_str().map(ToOwned::to_owned).into());

    let port = if default_known_ports {
        url.port_or_known_default()
    } else {
        url.port()
    };
    map.insert("port", port.into());
    map.insert("fragment", url.fragment().map(ToOwned::to_owned).into());
    map.insert(
        "query",
        url.query_pairs()
            .into_owned()
            .map(|(k, v)| (k.into(), v.into()))
            .collect::<ObjectMap>()
            .into(),
    );

    map.into_iter()
        .map(|(k, v)| (k.to_owned(), v))
        .collect::<Value>()
}

fn inner_kind() -> BTreeMap<Field, Kind> {
    BTreeMap::from([
        ("scheme".into(), Kind::bytes()),
        ("username".into(), Kind::bytes()),
        ("password".into(), Kind::bytes()),
        ("path".into(), Kind::bytes().or_null()),
        ("host".into(), Kind::bytes()),
        ("port".into(), Kind::integer().or_null()),
        ("fragment".into(), Kind::bytes().or_null()),
        (
            "query".into(),
            Kind::object(Collection::from_unknown(Kind::bytes())),
        ),
    ])
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::value;

    test_function![
        parse_url => ParseUrl;

        https {
            args: func_args![value: value!("https://vector.dev")],
            want: Ok(value!({
                fragment: (),
                host: "vector.dev",
                password: "",
                path: "/",
                port: (),
                query: {},
                scheme: "https",
                username: "",
            })),
            tdef: TypeDef::object(inner_kind()).fallible(),
        }

        default_port_specified {
            args: func_args![value: value!("https://vector.dev:443")],
            want: Ok(value!({
                fragment: (),
                host: "vector.dev",
                password: "",
                path: "/",
                port: (),
                query: {},
                scheme: "https",
                username: "",
            })),
            tdef: TypeDef::object(inner_kind()).fallible(),
        }

        default_port {
            args: func_args![value: value!("https://vector.dev"), default_known_ports: true],
            want: Ok(value!({
                fragment: (),
                host: "vector.dev",
                password: "",
                path: "/",
                port: 443_i64,
                query: {},
                scheme: "https",
                username: "",
            })),
            tdef: TypeDef::object(inner_kind()).fallible(),
        }

        punycode {
            args: func_args![value: value!("https://www.café.com")],
            want: Ok(value!({
                fragment: (),
                host: "www.xn--caf-dma.com",
                password: "",
                path: "/",
                port: (),
                query: {},
                scheme: "https",
                username: "",
            })),
            tdef: TypeDef::object(inner_kind()).fallible(),
        }

        punycode_mixed_case {
            args: func_args![value: value!("https://www.CAFé.com")],
            want: Ok(value!({
                fragment: (),
                host: "www.xn--caf-dma.com",
                password: "",
                path: "/",
                port: (),
                query: {},
                scheme: "https",
                username: "",
            })),
            tdef: TypeDef::object(inner_kind()).fallible(),
        }
    ];
}