Skip to main content

glean_core/metrics/
url.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5use std::sync::Arc;
6
7use crate::common_metric_data::CommonMetricDataInternal;
8use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType};
9use crate::metrics::Metric;
10use crate::metrics::MetricType;
11use crate::util::truncate_string_at_boundary_with_error;
12use crate::Glean;
13use crate::{CommonMetricData, TestGetValue};
14
15// The maximum number of characters a URL Metric may have, before encoding.
16const MAX_URL_LENGTH: usize = 8192;
17
18/// A URL metric.
19///
20/// Record an Unicode string value a URL content.
21/// The URL is length-limited to `MAX_URL_LENGTH` bytes.
22#[derive(Clone, Debug)]
23pub struct UrlMetric {
24    meta: Arc<CommonMetricDataInternal>,
25}
26
27impl MetricType for UrlMetric {
28    fn meta(&self) -> &CommonMetricDataInternal {
29        &self.meta
30    }
31}
32
33// IMPORTANT:
34//
35// When changing this implementation, make sure all the operations are
36// also declared in the related trait in `../traits/`.
37impl UrlMetric {
38    /// Creates a new string metric.
39    pub fn new(meta: CommonMetricData) -> Self {
40        Self {
41            meta: Arc::new(meta.into()),
42        }
43    }
44
45    fn is_valid_url_scheme(&self, value: String) -> bool {
46        let mut splits = value.split(':');
47        if let Some(scheme) = splits.next() {
48            if scheme.is_empty() {
49                return false;
50            }
51            let mut chars = scheme.chars();
52            // The list of characters allowed in the scheme is on
53            // the spec here: https://url.spec.whatwg.org/#url-scheme-string
54            return chars.next().unwrap().is_ascii_alphabetic()
55                && chars.all(|c| c.is_ascii_alphanumeric() || ['+', '-', '.'].contains(&c));
56        }
57
58        // No ':' found, this is not valid :)
59        false
60    }
61
62    /// Sets to the specified stringified URL.
63    ///
64    /// # Arguments
65    ///
66    /// * `value` - The stringified URL to set the metric to.
67    ///
68    /// ## Notes
69    ///
70    /// Truncates the value if it is longer than `MAX_URL_LENGTH` bytes and logs an error.
71    pub fn set<S: Into<String>>(&self, value: S) {
72        let value = value.into();
73        let metric = self.clone();
74        crate::launch_with_glean(move |glean| metric.set_sync(glean, value))
75    }
76
77    /// Sets to the specified stringified URL synchronously.
78    #[doc(hidden)]
79    pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) {
80        if !self.should_record(glean) {
81            return;
82        }
83
84        let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_URL_LENGTH);
85
86        if s.starts_with("data:") {
87            record_error(
88                glean,
89                &self.meta,
90                ErrorType::InvalidValue,
91                "URL metric does not support data URLs.",
92                None,
93            );
94            return;
95        }
96
97        if !self.is_valid_url_scheme(s.clone()) {
98            let msg = format!("\"{}\" does not start with a valid URL scheme.", s);
99            record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None);
100            return;
101        }
102
103        let value = Metric::Url(s);
104        glean.storage().record(glean, &self.meta, &value)
105    }
106
107    #[doc(hidden)]
108    pub(crate) fn get_value<'a, S: Into<Option<&'a str>>>(
109        &self,
110        glean: &Glean,
111        ping_name: S,
112    ) -> Option<String> {
113        let queried_ping_name = ping_name
114            .into()
115            .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]);
116
117        match glean.storage().get_metric(self.meta(), queried_ping_name) {
118            Some(Metric::Url(s)) => Some(s),
119            _ => None,
120        }
121    }
122
123    /// **Exported for test purposes.**
124    ///
125    /// Gets the number of recorded errors for the given metric and error type.
126    ///
127    /// # Arguments
128    ///
129    /// * `error` - The type of error
130    ///
131    /// # Returns
132    ///
133    /// The number of errors reported.
134    pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 {
135        crate::block_on_dispatcher();
136
137        crate::core::with_glean(|glean| {
138            test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0)
139        })
140    }
141}
142
143impl TestGetValue for UrlMetric {
144    type Output = String;
145
146    /// **Test-only API (exported for FFI purposes).**
147    ///
148    /// Gets the currently stored value as a string.
149    ///
150    /// This doesn't clear the stored value.
151    ///
152    /// # Arguments
153    ///
154    /// * `ping_name` - the optional name of the ping to retrieve the metric
155    ///                 for. Defaults to the first value in `send_in_pings`.
156    ///
157    /// # Returns
158    ///
159    /// The stored value or `None` if nothing stored.
160    fn test_get_value(&self, ping_name: Option<String>) -> Option<String> {
161        crate::block_on_dispatcher();
162        crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref()))
163    }
164}
165
166#[cfg(test)]
167mod test {
168    use super::*;
169    use crate::tests::new_glean;
170    use crate::Lifetime;
171
172    #[test]
173    fn payload_is_correct() {
174        let (glean, _t) = new_glean(None);
175
176        let metric = UrlMetric::new(CommonMetricData {
177            name: "url_metric".into(),
178            category: "test".into(),
179            send_in_pings: vec!["store1".into()],
180            lifetime: Lifetime::Application,
181            ..Default::default()
182        });
183
184        let sample_url = "glean://test".to_string();
185        metric.set_sync(&glean, sample_url.clone());
186        assert_eq!(sample_url, metric.get_value(&glean, "store1").unwrap());
187    }
188
189    #[test]
190    fn does_not_record_url_exceeding_maximum_length() {
191        let (glean, _t) = new_glean(None);
192
193        let metric = UrlMetric::new(CommonMetricData {
194            name: "url_metric".into(),
195            category: "test".into(),
196            send_in_pings: vec!["store1".into()],
197            lifetime: Lifetime::Application,
198            ..Default::default()
199        });
200
201        // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the
202        // MAX_URL_LENGTH.
203        //
204        // This 8-character string was chosen so we could have an even number that is
205        // a divisor of our MAX_URL_LENGTH.
206        let long_path_base = "abcdefgh";
207
208        // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH.
209        let test_url = format!("glean://{}", long_path_base.repeat(2000));
210        metric.set_sync(&glean, test_url);
211
212        // "glean://" is 8 characters
213        // "abcdefgh" (long_path_base) is 8 characters
214        // `long_path_base` is repeated 1023 times (8184)
215        // 8 + 8184 = 8192 (MAX_URL_LENGTH)
216        let expected = format!("glean://{}", long_path_base.repeat(1023));
217
218        assert_eq!(metric.get_value(&glean, "store1").unwrap(), expected);
219        assert_eq!(
220            1,
221            test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow)
222                .unwrap()
223        );
224    }
225
226    #[test]
227    fn does_not_record_data_urls() {
228        let (glean, _t) = new_glean(None);
229
230        let metric = UrlMetric::new(CommonMetricData {
231            name: "url_metric".into(),
232            category: "test".into(),
233            send_in_pings: vec!["store1".into()],
234            lifetime: Lifetime::Application,
235            disabled: false,
236            label: None,
237            in_session: false,
238        });
239
240        let test_url = "data:application/json";
241        metric.set_sync(&glean, test_url);
242
243        assert!(metric.get_value(&glean, "store1").is_none());
244
245        assert_eq!(
246            1,
247            test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap()
248        );
249    }
250
251    #[test]
252    fn url_validation_works_and_records_errors() {
253        let (glean, _t) = new_glean(None);
254
255        let metric = UrlMetric::new(CommonMetricData {
256            name: "url_metric".into(),
257            category: "test".into(),
258            send_in_pings: vec!["store1".into()],
259            lifetime: Lifetime::Application,
260            ..Default::default()
261        });
262
263        let incorrects = vec![
264            "",
265            // Scheme may only start with upper or lowercase ASCII alpha[^1] character.
266            // [1]: https://infra.spec.whatwg.org/#ascii-alpha
267            "1glean://test",
268            "-glean://test",
269            // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters.
270            "шеллы://test",
271            "g!lean://test",
272            "g=lean://test",
273            // Scheme must be followed by `:` character.
274            "glean//test",
275        ];
276
277        let corrects = vec![
278            // The minimum URL
279            "g:",
280            // Empty body is fine
281            "glean://",
282            // "//" is actually not even necessary
283            "glean:",
284            "glean:test",
285            "glean:test.com",
286            // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters.
287            "g-lean://test",
288            "g+lean://test",
289            "g.lean://test",
290            // Query parameters are fine
291            "glean://test?hello=world",
292            // Finally, some actual real world URLs
293            "https://infra.spec.whatwg.org/#ascii-alpha",
294            "https://infra.spec.whatwg.org/#ascii-alpha?test=for-glean",
295        ];
296
297        for incorrect in incorrects.clone().into_iter() {
298            metric.set_sync(&glean, incorrect);
299            assert!(metric.get_value(&glean, "store1").is_none());
300        }
301
302        assert_eq!(
303            incorrects.len(),
304            test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap()
305                as usize
306        );
307
308        for correct in corrects.into_iter() {
309            metric.set_sync(&glean, correct);
310            assert_eq!(metric.get_value(&glean, "store1").unwrap(), correct);
311        }
312    }
313}