glean_core/
util.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5use chrono::{DateTime, FixedOffset, Local};
6
7use crate::common_metric_data::CommonMetricDataInternal;
8use crate::error_recording::{record_error, ErrorType};
9use crate::metrics::TimeUnit;
10use crate::Glean;
11
12/// Generates a pipeline-friendly string
13/// that replaces non alphanumeric characters with dashes.
14pub fn sanitize_application_id(application_id: &str) -> String {
15    let mut last_dash = false;
16    application_id
17        .chars()
18        .filter_map(|x| match x {
19            'A'..='Z' | 'a'..='z' | '0'..='9' => {
20                last_dash = false;
21                Some(x.to_ascii_lowercase())
22            }
23            _ => {
24                let result = if last_dash { None } else { Some('-') };
25                last_dash = true;
26                result
27            }
28        })
29        .collect()
30}
31
32/// Generates an ISO8601 compliant date/time string for the given time,
33/// truncating it to the provided [`TimeUnit`].
34///
35/// # Arguments
36///
37/// * `datetime` - the [`DateTime`] object that holds the date, time and timezone information.
38/// * `truncate_to` - the desired resolution to use for the output string.
39///
40/// # Returns
41///
42/// A string representing the provided date/time truncated to the requested time unit.
43pub fn get_iso_time_string(datetime: DateTime<FixedOffset>, truncate_to: TimeUnit) -> String {
44    datetime.format(truncate_to.format_pattern()).to_string()
45}
46
47/// Get the current date & time with a fixed-offset timezone.
48///
49/// This converts from the `Local` timezone into its fixed-offset equivalent.
50/// If a timezone outside of [-24h, +24h] is detected it corrects the timezone offset to UTC (+0).
51pub(crate) fn local_now_with_offset() -> DateTime<FixedOffset> {
52    // See https://bugzilla.mozilla.org/show_bug.cgi?id=1611770.
53    //
54    // It's not clear if this bug on Windows still exist with the latest versions of
55    // the `time` crate. Removed the workaround.
56    let now: DateTime<Local> = Local::now();
57    now.with_timezone(now.offset())
58}
59
60/// Truncates a string, ensuring that it doesn't end in the middle of a codepoint.
61///
62/// # Arguments
63///
64/// * `value` - The string to truncate.
65/// * `length` - The length, in bytes, to truncate to.  The resulting string will
66///   be at most this many bytes, but may be shorter to prevent ending in the middle
67///   of a codepoint.
68///
69/// # Returns
70///
71/// A string, with at most `length` bytes.
72pub(crate) fn truncate_string_at_boundary<S: Into<String>>(value: S, length: usize) -> String {
73    let s = value.into();
74    if s.len() > length {
75        for i in (0..=length).rev() {
76            if s.is_char_boundary(i) {
77                return s[0..i].to_string();
78            }
79        }
80        // If we never saw a character boundary, the safest thing we can do is
81        // return the empty string, though this should never happen in practice.
82        return "".to_string();
83    }
84    s
85}
86
87/// Truncates a string, ensuring that it doesn't end in the middle of a codepoint.
88/// If the string required truncation, records an error through the error
89/// reporting mechanism.
90///
91/// # Arguments
92///
93/// * `glean` - The Glean instance the metric doing the truncation belongs to.
94/// * `meta` - The metadata for the metric. Used for recording the error.
95/// * `value` - The String to truncate.
96/// * `length` - The length, in bytes, to truncate to.  The resulting string will
97///   be at most this many bytes, but may be shorter to prevent ending in the middle
98///   of a codepoint.
99///
100/// # Returns
101///
102/// A string, with at most `length` bytes.
103pub(crate) fn truncate_string_at_boundary_with_error<S: Into<String>>(
104    glean: &Glean,
105    meta: &CommonMetricDataInternal,
106    value: S,
107    length: usize,
108) -> String {
109    let s = value.into();
110    if s.len() > length {
111        let msg = format!("Value length {} exceeds maximum of {}", s.len(), length);
112        record_error(glean, meta, ErrorType::InvalidOverflow, msg, None);
113        truncate_string_at_boundary(s, length)
114    } else {
115        s
116    }
117}
118
119// On i686 on Windows, the CPython interpreter sets the FPU precision control
120// flag to 53 bits of precision, rather than the 64 bit default. On x86_64 on
121// Windows, the CPython interpreter changes the rounding control settings. This
122// causes different floating point results than on other architectures. This
123// context manager makes it easy to set the correct precision and rounding control
124// to match our other targets and platforms.
125//
126// See https://bugzilla.mozilla.org/show_bug.cgi?id=1623335 for additional context.
127#[cfg(all(target_os = "windows", target_env = "gnu"))]
128pub mod floating_point_context {
129    // `size_t` is "pointer size", which is equivalent to Rust's `usize`.
130    // It's defined as such in libc:
131    // * https://github.com/rust-lang/libc/blob/bcbfeb5516cd5bb055198dbfbddf8d626fa2be07/src/unix/mod.rs#L19
132    // * https://github.com/rust-lang/libc/blob/bcbfeb5516cd5bb055198dbfbddf8d626fa2be07/src/windows/mod.rs#L16
133    #[allow(non_camel_case_types)]
134    type size_t = usize;
135
136    #[link(name = "m")]
137    extern "C" {
138        // Gets and sets the floating point control word.
139        // See documentation here:
140        // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/controlfp-s
141        fn _controlfp_s(current: *mut size_t, new: size_t, mask: size_t) -> size_t;
142    }
143
144    // Rounding control mask
145    const MCW_RC: size_t = 0x00000300;
146    // Round by truncation
147    const RC_CHOP: size_t = 0x00000300;
148    // Precision control mask
149    const MCW_PC: size_t = 0x00030000;
150    // Values for 64-bit precision
151    const PC_64: size_t = 0x00000000;
152
153    pub struct FloatingPointContext {
154        original_value: size_t,
155    }
156
157    impl FloatingPointContext {
158        pub fn new() -> Self {
159            let mut current: size_t = 0;
160            let _err = unsafe { _controlfp_s(&mut current, PC_64 | RC_CHOP, MCW_PC | MCW_RC) };
161
162            FloatingPointContext {
163                original_value: current,
164            }
165        }
166    }
167
168    impl Drop for FloatingPointContext {
169        fn drop(&mut self) {
170            let mut current: size_t = 0;
171            let _err = unsafe { _controlfp_s(&mut current, self.original_value, MCW_PC | MCW_RC) };
172        }
173    }
174}
175
176#[cfg(not(all(target_os = "windows", target_env = "gnu")))]
177pub mod floating_point_context {
178    pub struct FloatingPointContext {}
179
180    impl FloatingPointContext {
181        pub fn new() -> Self {
182            FloatingPointContext {}
183        }
184    }
185}
186
187#[cfg(test)]
188mod test {
189    use super::*;
190    use chrono::{offset::TimeZone, Timelike};
191
192    #[test]
193    fn test_sanitize_application_id() {
194        assert_eq!(
195            "org-mozilla-test-app",
196            sanitize_application_id("org.mozilla.test-app")
197        );
198        assert_eq!(
199            "org-mozilla-test-app",
200            sanitize_application_id("org.mozilla..test---app")
201        );
202        assert_eq!(
203            "org-mozilla-test-app",
204            sanitize_application_id("org-mozilla-test-app")
205        );
206        assert_eq!(
207            "org-mozilla-test-app",
208            sanitize_application_id("org.mozilla.Test.App")
209        );
210    }
211
212    #[test]
213    fn test_get_iso_time_string() {
214        // `1985-07-03T12:09:14.000560274+01:00`
215        let dt = FixedOffset::east_opt(3600)
216            .unwrap()
217            .with_ymd_and_hms(1985, 7, 3, 12, 9, 14)
218            .unwrap()
219            .with_nanosecond(1_560_274)
220            .unwrap();
221        assert_eq!(
222            "1985-07-03T12:09:14.001560274+01:00",
223            get_iso_time_string(dt, TimeUnit::Nanosecond)
224        );
225        assert_eq!(
226            "1985-07-03T12:09:14.001560+01:00",
227            get_iso_time_string(dt, TimeUnit::Microsecond)
228        );
229        assert_eq!(
230            "1985-07-03T12:09:14.001+01:00",
231            get_iso_time_string(dt, TimeUnit::Millisecond)
232        );
233        assert_eq!(
234            "1985-07-03T12:09:14+01:00",
235            get_iso_time_string(dt, TimeUnit::Second)
236        );
237        assert_eq!(
238            "1985-07-03T12:09+01:00",
239            get_iso_time_string(dt, TimeUnit::Minute)
240        );
241        assert_eq!(
242            "1985-07-03T12+01:00",
243            get_iso_time_string(dt, TimeUnit::Hour)
244        );
245        assert_eq!("1985-07-03+01:00", get_iso_time_string(dt, TimeUnit::Day));
246    }
247
248    #[test]
249    fn local_now_gets_the_time() {
250        let now = Local::now();
251        let fixed_now = local_now_with_offset();
252
253        // We can't compare across differing timezones, so we just compare the UTC timestamps.
254        // The second timestamp should be just a few nanoseconds later.
255        assert!(
256            fixed_now.naive_utc() >= now.naive_utc(),
257            "Time mismatch. Local now: {:?}, Fixed now: {:?}",
258            now,
259            fixed_now
260        );
261    }
262
263    #[test]
264    fn truncate_safely_test() {
265        let value = "电脑坏了".to_string();
266        let truncated = truncate_string_at_boundary(value, 10);
267        assert_eq!("电脑坏", truncated);
268
269        let value = "0123456789abcdef".to_string();
270        let truncated = truncate_string_at_boundary(value, 10);
271        assert_eq!("0123456789", truncated);
272    }
273
274    #[test]
275    #[should_panic]
276    fn truncate_naive() {
277        // Ensure that truncating the naïve way on this string would panic
278        let value = "电脑坏了".to_string();
279        value[0..10].to_string();
280    }
281}