dw_transform/
split_url.rs

1use dw_models::Event;
2use serde_json::value::Value;
3
4/// Adds $protocol, $domain, $path and $params keys for events with an "url" key
5///
6/// But it only adds the generated field if it exists, for example if a url does not have a path
7/// the path value will not be set at all.
8///
9/// # Example
10/// ```ignore
11/// input:  {
12///           "data": {
13///             "url": "http://google.com/test"
14///           }
15///         }
16/// output: {
17///           "data": {
18///             "$domain": "google.com",
19///             "$path": "/test",
20///             "$protocol": "http"
21///           }
22///         }
23/// ```
24pub fn split_url_event(event: &mut Event) {
25    use rocket::http::uri::Absolute;
26    let uri_str = match event.data.get("url") {
27        None => return,
28        Some(val) => match val {
29            Value::String(s) => s.clone(),
30            _ => return,
31        },
32    };
33    let uri = match Absolute::parse(&uri_str) {
34        Ok(uri) => uri,
35        Err(_) => return,
36    };
37    // Protocol
38    let protocol = uri.scheme().to_string();
39    event
40        .data
41        .insert("$protocol".to_string(), Value::String(protocol));
42    // Domain
43    let domain = match uri.authority() {
44        Some(authority) => authority.host().trim_start_matches("www.").to_string(),
45        None => "".to_string(),
46    };
47    event
48        .data
49        .insert("$domain".to_string(), Value::String(domain));
50
51    // Path
52    let path = uri.path().to_string();
53    event.data.insert("$path".to_string(), Value::String(path));
54
55    // Params
56    let params = match uri.query() {
57        Some(query) => query.to_string(),
58        None => "".to_string(),
59    };
60    event
61        .data
62        .insert("$params".to_string(), Value::String(params));
63
64    // TODO: dw-server-python also has options and identifier
65}
66
67#[cfg(test)]
68mod tests {
69    use std::str::FromStr;
70
71    use chrono::DateTime;
72    use chrono::Duration;
73    use serde_json::json;
74
75    use dw_models::Event;
76
77    use super::split_url_event;
78
79    #[test]
80    fn test_split_url_events() {
81        let mut e1 = Event {
82            id: None,
83            timestamp: DateTime::from_str("2000-01-01T00:00:01Z").unwrap(),
84            duration: Duration::seconds(1),
85            data: json_map! {"url": "http://www.google.com/path?query=1"},
86        };
87        split_url_event(&mut e1);
88        assert_eq!(
89            e1.data,
90            json_map! {
91                "url": json!("http://www.google.com/path?query=1"),
92                "$protocol": json!("http"),
93                "$domain": json!("google.com"),
94                "$path": json!("/path"),
95                "$params": json!("query=1")
96            }
97        );
98    }
99}