glean_core/metrics/
url.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5use std::sync::Arc;
6
7use crate::common_metric_data::CommonMetricDataInternal;
8use crate::error_recording::{record_error, test_get_num_recorded_errors, ErrorType};
9use crate::metrics::Metric;
10use crate::metrics::MetricType;
11use crate::storage::StorageManager;
12use crate::util::truncate_string_at_boundary_with_error;
13use crate::Glean;
14use crate::{CommonMetricData, TestGetValue};
15
16// The maximum number of characters a URL Metric may have, before encoding.
17const MAX_URL_LENGTH: usize = 8192;
18
19/// A URL metric.
20///
21/// Record an Unicode string value a URL content.
22/// The URL is length-limited to `MAX_URL_LENGTH` bytes.
23#[derive(Clone, Debug)]
24pub struct UrlMetric {
25    meta: Arc<CommonMetricDataInternal>,
26}
27
28impl MetricType for UrlMetric {
29    fn meta(&self) -> &CommonMetricDataInternal {
30        &self.meta
31    }
32}
33
34// IMPORTANT:
35//
36// When changing this implementation, make sure all the operations are
37// also declared in the related trait in `../traits/`.
38impl UrlMetric {
39    /// Creates a new string metric.
40    pub fn new(meta: CommonMetricData) -> Self {
41        Self {
42            meta: Arc::new(meta.into()),
43        }
44    }
45
46    fn is_valid_url_scheme(&self, value: String) -> bool {
47        let mut splits = value.split(':');
48        if let Some(scheme) = splits.next() {
49            if scheme.is_empty() {
50                return false;
51            }
52            let mut chars = scheme.chars();
53            // The list of characters allowed in the scheme is on
54            // the spec here: https://url.spec.whatwg.org/#url-scheme-string
55            return chars.next().unwrap().is_ascii_alphabetic()
56                && chars.all(|c| c.is_ascii_alphanumeric() || ['+', '-', '.'].contains(&c));
57        }
58
59        // No ':' found, this is not valid :)
60        false
61    }
62
63    /// Sets to the specified stringified URL.
64    ///
65    /// # Arguments
66    ///
67    /// * `value` - The stringified URL to set the metric to.
68    ///
69    /// ## Notes
70    ///
71    /// Truncates the value if it is longer than `MAX_URL_LENGTH` bytes and logs an error.
72    pub fn set<S: Into<String>>(&self, value: S) {
73        let value = value.into();
74        let metric = self.clone();
75        crate::launch_with_glean(move |glean| metric.set_sync(glean, value))
76    }
77
78    /// Sets to the specified stringified URL synchronously.
79    #[doc(hidden)]
80    pub fn set_sync<S: Into<String>>(&self, glean: &Glean, value: S) {
81        if !self.should_record(glean) {
82            return;
83        }
84
85        let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_URL_LENGTH);
86
87        if s.starts_with("data:") {
88            record_error(
89                glean,
90                &self.meta,
91                ErrorType::InvalidValue,
92                "URL metric does not support data URLs.",
93                None,
94            );
95            return;
96        }
97
98        if !self.is_valid_url_scheme(s.clone()) {
99            let msg = format!("\"{}\" does not start with a valid URL scheme.", s);
100            record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None);
101            return;
102        }
103
104        let value = Metric::Url(s);
105        glean.storage().record(glean, &self.meta, &value)
106    }
107
108    #[doc(hidden)]
109    pub(crate) fn get_value<'a, S: Into<Option<&'a str>>>(
110        &self,
111        glean: &Glean,
112        ping_name: S,
113    ) -> Option<String> {
114        let queried_ping_name = ping_name
115            .into()
116            .unwrap_or_else(|| &self.meta().inner.send_in_pings[0]);
117
118        match StorageManager.snapshot_metric_for_test(
119            glean.storage(),
120            queried_ping_name,
121            &self.meta.identifier(glean),
122            self.meta.inner.lifetime,
123        ) {
124            Some(Metric::Url(s)) => Some(s),
125            _ => None,
126        }
127    }
128
129    /// **Exported for test purposes.**
130    ///
131    /// Gets the number of recorded errors for the given metric and error type.
132    ///
133    /// # Arguments
134    ///
135    /// * `error` - The type of error
136    ///
137    /// # Returns
138    ///
139    /// The number of errors reported.
140    pub fn test_get_num_recorded_errors(&self, error: ErrorType) -> i32 {
141        crate::block_on_dispatcher();
142
143        crate::core::with_glean(|glean| {
144            test_get_num_recorded_errors(glean, self.meta(), error).unwrap_or(0)
145        })
146    }
147}
148
149impl TestGetValue for UrlMetric {
150    type Output = String;
151
152    /// **Test-only API (exported for FFI purposes).**
153    ///
154    /// Gets the currently stored value as a string.
155    ///
156    /// This doesn't clear the stored value.
157    ///
158    /// # Arguments
159    ///
160    /// * `ping_name` - the optional name of the ping to retrieve the metric
161    ///                 for. Defaults to the first value in `send_in_pings`.
162    ///
163    /// # Returns
164    ///
165    /// The stored value or `None` if nothing stored.
166    fn test_get_value(&self, ping_name: Option<String>) -> Option<String> {
167        crate::block_on_dispatcher();
168        crate::core::with_glean(|glean| self.get_value(glean, ping_name.as_deref()))
169    }
170}
171
172#[cfg(test)]
173mod test {
174    use super::*;
175    use crate::tests::new_glean;
176    use crate::Lifetime;
177
178    #[test]
179    fn payload_is_correct() {
180        let (glean, _t) = new_glean(None);
181
182        let metric = UrlMetric::new(CommonMetricData {
183            name: "url_metric".into(),
184            category: "test".into(),
185            send_in_pings: vec!["store1".into()],
186            lifetime: Lifetime::Application,
187            disabled: false,
188            dynamic_label: None,
189        });
190
191        let sample_url = "glean://test".to_string();
192        metric.set_sync(&glean, sample_url.clone());
193        assert_eq!(sample_url, metric.get_value(&glean, "store1").unwrap());
194    }
195
196    #[test]
197    fn does_not_record_url_exceeding_maximum_length() {
198        let (glean, _t) = new_glean(None);
199
200        let metric = UrlMetric::new(CommonMetricData {
201            name: "url_metric".into(),
202            category: "test".into(),
203            send_in_pings: vec!["store1".into()],
204            lifetime: Lifetime::Application,
205            disabled: false,
206            dynamic_label: None,
207        });
208
209        // Whenever the URL is longer than our MAX_URL_LENGTH, we truncate the URL to the
210        // MAX_URL_LENGTH.
211        //
212        // This 8-character string was chosen so we could have an even number that is
213        // a divisor of our MAX_URL_LENGTH.
214        let long_path_base = "abcdefgh";
215
216        // Using 2000 creates a string > 16000 characters, well over MAX_URL_LENGTH.
217        let test_url = format!("glean://{}", long_path_base.repeat(2000));
218        metric.set_sync(&glean, test_url);
219
220        // "glean://" is 8 characters
221        // "abcdefgh" (long_path_base) is 8 characters
222        // `long_path_base` is repeated 1023 times (8184)
223        // 8 + 8184 = 8192 (MAX_URL_LENGTH)
224        let expected = format!("glean://{}", long_path_base.repeat(1023));
225
226        assert_eq!(metric.get_value(&glean, "store1").unwrap(), expected);
227        assert_eq!(
228            1,
229            test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow)
230                .unwrap()
231        );
232    }
233
234    #[test]
235    fn does_not_record_data_urls() {
236        let (glean, _t) = new_glean(None);
237
238        let metric = UrlMetric::new(CommonMetricData {
239            name: "url_metric".into(),
240            category: "test".into(),
241            send_in_pings: vec!["store1".into()],
242            lifetime: Lifetime::Application,
243            disabled: false,
244            dynamic_label: None,
245        });
246
247        let test_url = "data:application/json";
248        metric.set_sync(&glean, test_url);
249
250        assert!(metric.get_value(&glean, "store1").is_none());
251
252        assert_eq!(
253            1,
254            test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap()
255        );
256    }
257
258    #[test]
259    fn url_validation_works_and_records_errors() {
260        let (glean, _t) = new_glean(None);
261
262        let metric = UrlMetric::new(CommonMetricData {
263            name: "url_metric".into(),
264            category: "test".into(),
265            send_in_pings: vec!["store1".into()],
266            lifetime: Lifetime::Application,
267            disabled: false,
268            dynamic_label: None,
269        });
270
271        let incorrects = vec![
272            "",
273            // Scheme may only start with upper or lowercase ASCII alpha[^1] character.
274            // [1]: https://infra.spec.whatwg.org/#ascii-alpha
275            "1glean://test",
276            "-glean://test",
277            // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters.
278            "шеллы://test",
279            "g!lean://test",
280            "g=lean://test",
281            // Scheme must be followed by `:` character.
282            "glean//test",
283        ];
284
285        let corrects = vec![
286            // The minimum URL
287            "g:",
288            // Empty body is fine
289            "glean://",
290            // "//" is actually not even necessary
291            "glean:",
292            "glean:test",
293            "glean:test.com",
294            // Scheme may only have ASCII alphanumeric characters or the `-`, `.`, `+` characters.
295            "g-lean://test",
296            "g+lean://test",
297            "g.lean://test",
298            // Query parameters are fine
299            "glean://test?hello=world",
300            // Finally, some actual real world URLs
301            "https://infra.spec.whatwg.org/#ascii-alpha",
302            "https://infra.spec.whatwg.org/#ascii-alpha?test=for-glean",
303        ];
304
305        for incorrect in incorrects.clone().into_iter() {
306            metric.set_sync(&glean, incorrect);
307            assert!(metric.get_value(&glean, "store1").is_none());
308        }
309
310        assert_eq!(
311            incorrects.len(),
312            test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue).unwrap()
313                as usize
314        );
315
316        for correct in corrects.into_iter() {
317            metric.set_sync(&glean, correct);
318            assert_eq!(metric.get_value(&glean, "store1").unwrap(), correct);
319        }
320    }
321}