Skip to main content

glean_core/ping/
mod.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5//! Ping collection, assembly & submission.
6
7use std::fs::{self, create_dir_all, File};
8use std::io::{BufRead, BufReader, Write};
9use std::path::{Path, PathBuf};
10
11use log::info;
12use serde_json::{json, Value as JsonValue};
13
14use crate::common_metric_data::{CommonMetricData, Lifetime};
15use crate::metrics::{CounterMetric, DatetimeMetric, Metric, MetricType, PingType, TimeUnit};
16use crate::storage::{StorageManager, INTERNAL_STORAGE};
17use crate::upload::{HeaderMap, PingMetadata};
18use crate::util::{get_iso_time_string, local_now_with_offset};
19use crate::{Glean, Result, DELETION_REQUEST_PINGS_DIRECTORY, PENDING_PINGS_DIRECTORY};
20
21/// Holds everything you need to store or send a ping.
22pub struct Ping<'a> {
23    /// The unique document id.
24    pub doc_id: &'a str,
25    /// The ping's name.
26    pub name: &'a str,
27    /// The path on the server to use when uplaoding this ping.
28    pub url_path: &'a str,
29    /// The payload, including `*_info` fields.
30    pub content: JsonValue,
31    /// The headers to upload with the payload.
32    pub headers: HeaderMap,
33    /// Whether the content contains {client|ping}_info sections.
34    pub includes_info_sections: bool,
35    /// Other pings that should be scheduled when this ping is sent.
36    pub schedules_pings: Vec<String>,
37    /// Capabilities the uploader must have in order to uplaoad this ping.
38    pub uploader_capabilities: Vec<String>,
39}
40
41/// Collect a ping's data, assemble it into its full payload and store it on disk.
42pub struct PingMaker;
43
44fn merge(a: &mut JsonValue, b: &JsonValue) {
45    match (a, b) {
46        (&mut JsonValue::Object(ref mut a), JsonValue::Object(b)) => {
47            for (k, v) in b {
48                merge(a.entry(k.clone()).or_insert(JsonValue::Null), v);
49            }
50        }
51        (a, b) => {
52            *a = b.clone();
53        }
54    }
55}
56
57impl Default for PingMaker {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl PingMaker {
64    /// Creates a new [`PingMaker`].
65    pub fn new() -> Self {
66        Self
67    }
68
69    /// Gets, and then increments, the sequence number for a given ping.
70    fn get_ping_seq(&self, glean: &Glean, storage_name: &str) -> usize {
71        // Don't attempt to increase sequence number for disabled ping
72        if !glean.is_ping_enabled(storage_name) {
73            return 0;
74        }
75
76        // Sequence numbers are stored as a counter under a name that includes the storage name
77        let seq = CounterMetric::new(CommonMetricData {
78            name: format!("{}#sequence", storage_name),
79            // We don't need a category, the name is already unique
80            category: "".into(),
81            send_in_pings: vec![INTERNAL_STORAGE.into()],
82            lifetime: Lifetime::User,
83            ..Default::default()
84        });
85
86        let current_seq = match glean.storage().get_metric(seq.meta(), INTERNAL_STORAGE) {
87            Some(Metric::Counter(i)) => i,
88            _ => 0,
89        };
90
91        // Increase to next sequence id
92        seq.add_sync(glean, 1);
93
94        current_seq as usize
95    }
96
97    /// Gets the formatted start and end times for this ping and update for the next ping.
98    fn get_start_end_times(
99        &self,
100        glean: &Glean,
101        storage_name: &str,
102        time_unit: TimeUnit,
103    ) -> (String, String) {
104        let start_time = DatetimeMetric::new(
105            CommonMetricData {
106                name: format!("{}#start", storage_name),
107                category: "".into(),
108                send_in_pings: vec![INTERNAL_STORAGE.into()],
109                lifetime: Lifetime::User,
110                ..Default::default()
111            },
112            time_unit,
113        );
114
115        // "start_time" is the time the ping was generated the last time.
116        // If not available, we use the date the Glean object was initialized.
117        let start_time_data = start_time
118            .get_value(glean, INTERNAL_STORAGE)
119            .unwrap_or_else(|| glean.start_time());
120        let end_time_data = local_now_with_offset();
121
122        // Update the start time with the current time.
123        start_time.set_sync_chrono(glean, end_time_data);
124
125        // Format the times.
126        let start_time_data = get_iso_time_string(start_time_data, time_unit);
127        let end_time_data = get_iso_time_string(end_time_data, time_unit);
128        (start_time_data, end_time_data)
129    }
130
131    fn get_ping_info(
132        &self,
133        glean: &Glean,
134        storage_name: &str,
135        reason: Option<&str>,
136        precision: TimeUnit,
137    ) -> JsonValue {
138        let (start_time, end_time) = self.get_start_end_times(glean, storage_name, precision);
139        let mut map = json!({
140            "seq": self.get_ping_seq(glean, storage_name),
141            "start_time": start_time,
142            "end_time": end_time,
143        });
144
145        if let Some(reason) = reason {
146            map.as_object_mut()
147                .unwrap() // safe unwrap, we created the object above
148                .insert("reason".to_string(), JsonValue::String(reason.to_string()));
149        };
150
151        // Get the experiment data, if available.
152        if let Some(experiment_data) =
153            StorageManager.snapshot_experiments_as_json(glean.storage(), INTERNAL_STORAGE)
154        {
155            map.as_object_mut()
156                .unwrap() // safe unwrap, we created the object above
157                .insert("experiments".to_string(), experiment_data);
158        };
159
160        // Get the Server Knobs configuration, if available.
161        if let Some(config_json) = glean
162            .additional_metrics
163            .server_knobs_config
164            .get_value(glean, INTERNAL_STORAGE)
165        {
166            // Object metrics always hold a string produced by serde_json::to_string,
167            // so deserializing it back into a JsonValue cannot fail.
168            let server_knobs_config = serde_json::from_str(&config_json).unwrap();
169            map.as_object_mut()
170                .unwrap() // safe unwrap, we created the object above
171                .insert("server_knobs_config".to_string(), server_knobs_config);
172        }
173
174        map
175    }
176
177    fn get_client_info(&self, glean: &Glean, include_client_id: bool) -> JsonValue {
178        // Add the "telemetry_sdk_build", which is the glean-core version.
179        let mut map = json!({
180            "telemetry_sdk_build": crate::GLEAN_VERSION,
181        });
182
183        // Flatten the whole thing.
184        if let Some(client_info) =
185            StorageManager.snapshot_as_json(glean.storage(), "glean_client_info", true)
186        {
187            let client_info_obj = client_info.as_object().unwrap(); // safe unwrap, snapshot always returns an object.
188            for (_metric_type, metrics) in client_info_obj {
189                merge(&mut map, metrics);
190            }
191            let map = map.as_object_mut().unwrap(); // safe unwrap, we created the object above.
192            let mut attribution = serde_json::Map::new();
193            let mut distribution = serde_json::Map::new();
194            map.retain(|name, value| {
195                // Only works because we ensure no client_info metric categories contain '.'.
196                let mut split = name.split('.');
197                let category = split.next();
198                let name = split.next();
199                if let (Some(category), Some(name)) = (category, name) {
200                    if category == "attribution" {
201                        attribution.insert(name.into(), value.take());
202                        false
203                    } else if category == "distribution" {
204                        distribution.insert(name.into(), value.take());
205                        false
206                    } else {
207                        true
208                    }
209                } else {
210                    true
211                }
212            });
213            if !attribution.is_empty() {
214                map.insert("attribution".into(), serde_json::Value::from(attribution));
215            }
216            if !distribution.is_empty() {
217                map.insert("distribution".into(), serde_json::Value::from(distribution));
218            }
219        } else {
220            log::warn!("Empty client info data.");
221        }
222
223        if !include_client_id {
224            // safe unwrap, we created the object above
225            map.as_object_mut().unwrap().remove("client_id");
226        }
227
228        json!(map)
229    }
230
231    /// Build the headers to be persisted and sent with a ping.
232    ///
233    /// Currently the only headers we persist are `X-Debug-ID` and `X-Source-Tags`.
234    ///
235    /// # Arguments
236    ///
237    /// * `glean` - the [`Glean`] instance to collect headers from.
238    ///
239    /// # Returns
240    ///
241    /// A map of header names to header values.
242    /// Might be empty if there are no extra headers to send.
243    fn get_headers(&self, glean: &Glean) -> HeaderMap {
244        let mut headers_map = HeaderMap::new();
245
246        if let Some(debug_view_tag) = glean.debug_view_tag() {
247            headers_map.insert("X-Debug-ID".to_string(), debug_view_tag.to_string());
248        }
249
250        if let Some(source_tags) = glean.source_tags() {
251            headers_map.insert("X-Source-Tags".to_string(), source_tags.join(","));
252        }
253
254        headers_map
255    }
256
257    /// Collects a snapshot for the given ping from storage and attach required meta information.
258    ///
259    /// # Arguments
260    ///
261    /// * `glean` - the [`Glean`] instance to collect data from.
262    /// * `ping` - the ping to collect for.
263    /// * `reason` - an optional reason code to include in the ping.
264    /// * `doc_id` - the ping's unique document identifier.
265    /// * `url_path` - the path on the server to upload this ping to.
266    ///
267    /// # Returns
268    ///
269    /// A fully assembled representation of the ping payload and associated metadata.
270    /// If there is no data stored for the ping, `None` is returned.
271    pub fn collect<'a>(
272        &self,
273        glean: &Glean,
274        ping: &'a PingType,
275        reason: Option<&str>,
276        doc_id: &'a str,
277        url_path: &'a str,
278    ) -> Option<Ping<'a>> {
279        info!("Collecting {}", ping.name());
280        let database = glean.storage();
281
282        let mut metrics_data = StorageManager.snapshot_as_json(database, ping.name(), true);
283
284        let events_data = glean
285            .event_storage()
286            .snapshot_as_json(glean, ping.name(), true);
287
288        // We're adding the metric `glean.ping.uploader_capabilities` the most manual way here.
289        // This avoids creating a `StringListMetric` and further indirection.
290        // It also avoids yet another database write.
291        // It's only added if
292        // (1) There's already data in `metrics` or `events`
293        // (2) or the ping should be sent empty (`send_if_empty=true`)
294        let uploader_capabilities = ping.uploader_capabilities();
295        if !uploader_capabilities.is_empty() {
296            if metrics_data.is_none() && (ping.send_if_empty() || events_data.is_some()) {
297                metrics_data = Some(json!({}))
298            }
299
300            if let Some(map) = metrics_data.as_mut().and_then(|o| o.as_object_mut()) {
301                let lists = map
302                    .entry("string_list")
303                    .or_insert_with(|| json!({}))
304                    .as_object_mut()
305                    .unwrap();
306
307                lists.insert(
308                    "glean.ping.uploader_capabilities".to_string(),
309                    json!(uploader_capabilities),
310                );
311            }
312        }
313
314        // Due to the way the experimentation identifier could link datasets that are intentionally unlinked,
315        // it will not be included in pings that specifically exclude the Glean client-id, those pings that
316        // should not be sent if empty, or pings that exclude the {client|ping}_info sections wholesale.
317        if (!ping.include_client_id() || !ping.send_if_empty() || !ping.include_info_sections())
318            && glean.test_get_experimentation_id().is_some()
319            && metrics_data.is_some()
320        {
321            // There is a lot of unwrapping here, but that's fine because the `if` conditions above mean that the
322            // experimentation id is present in the metrics.
323            let metrics = metrics_data.as_mut().unwrap().as_object_mut().unwrap();
324            let metrics_count = metrics.len();
325            let strings = metrics.get_mut("string").unwrap().as_object_mut().unwrap();
326            let string_count = strings.len();
327
328            // Handle the send_if_empty case by checking if the experimentation id is the only metric in the data.
329            let empty_payload = events_data.is_none() && metrics_count == 1 && string_count == 1;
330            if !ping.include_client_id() || (!ping.send_if_empty() && empty_payload) {
331                strings.remove("glean.client.annotation.experimentation_id");
332            }
333
334            if strings.is_empty() {
335                metrics.remove("string");
336            }
337
338            if metrics.is_empty() {
339                metrics_data = None;
340            }
341        }
342
343        let is_empty = metrics_data.is_none() && events_data.is_none();
344        if !ping.send_if_empty() && is_empty {
345            info!("Storage for {} empty. Bailing out.", ping.name());
346            return None;
347        } else if ping.name() == "events" && events_data.is_none() {
348            info!("No events for 'events' ping. Bailing out.");
349            return None;
350        } else if is_empty {
351            info!(
352                "Storage for {} empty. Ping will still be sent.",
353                ping.name()
354            );
355        }
356
357        let precision = if ping.precise_timestamps() {
358            TimeUnit::Millisecond
359        } else {
360            TimeUnit::Minute
361        };
362
363        let mut json = if ping.include_info_sections() {
364            let ping_info = self.get_ping_info(glean, ping.name(), reason, precision);
365            let client_info = self.get_client_info(glean, ping.include_client_id());
366
367            json!({
368                "ping_info": ping_info,
369                "client_info": client_info
370            })
371        } else {
372            json!({})
373        };
374
375        let json_obj = json.as_object_mut()?;
376        if let Some(metrics_data) = metrics_data {
377            json_obj.insert("metrics".to_string(), metrics_data);
378        }
379        if let Some(events_data) = events_data {
380            json_obj.insert("events".to_string(), events_data);
381        }
382
383        Some(Ping {
384            content: json,
385            name: ping.name(),
386            doc_id,
387            url_path,
388            headers: self.get_headers(glean),
389            includes_info_sections: ping.include_info_sections(),
390            schedules_pings: ping.schedules_pings().to_vec(),
391            uploader_capabilities: ping.uploader_capabilities().to_vec(),
392        })
393    }
394
395    /// Gets the path to a directory for ping storage.
396    ///
397    /// The directory will be created inside the `data_path`.
398    /// The `pings` directory (and its parents) is created if it does not exist.
399    fn get_pings_dir(&self, data_path: &Path, ping_type: Option<&str>) -> std::io::Result<PathBuf> {
400        // Use a special directory for deletion-request pings
401        let pings_dir = match ping_type {
402            Some("deletion-request") => data_path.join(DELETION_REQUEST_PINGS_DIRECTORY),
403            _ => data_path.join(PENDING_PINGS_DIRECTORY),
404        };
405
406        create_dir_all(&pings_dir)?;
407        Ok(pings_dir)
408    }
409
410    /// Gets path to a directory for temporary storage.
411    ///
412    /// The directory will be created inside the `data_path`.
413    /// The `tmp` directory (and its parents) is created if it does not exist.
414    fn get_tmp_dir(&self, data_path: &Path) -> std::io::Result<PathBuf> {
415        let pings_dir = data_path.join("tmp");
416        create_dir_all(&pings_dir)?;
417        Ok(pings_dir)
418    }
419
420    /// Stores a ping to disk in the pings directory.
421    pub fn store_ping(&self, data_path: &Path, ping: &Ping) -> std::io::Result<()> {
422        let pings_dir = self.get_pings_dir(data_path, Some(ping.name))?;
423        let temp_dir = self.get_tmp_dir(data_path)?;
424
425        // Write to a temporary location and then move when done,
426        // for transactional writes.
427        let temp_ping_path = temp_dir.join(ping.doc_id);
428        let ping_path = pings_dir.join(ping.doc_id);
429
430        log::debug!(
431            "Storing ping '{}' at '{}'",
432            ping.doc_id,
433            ping_path.display()
434        );
435
436        {
437            let mut file = File::create(&temp_ping_path)?;
438            file.write_all(ping.url_path.as_bytes())?;
439            file.write_all(b"\n")?;
440            file.write_all(::serde_json::to_string(&ping.content)?.as_bytes())?;
441            file.write_all(b"\n")?;
442            let metadata = PingMetadata {
443                // We don't actually need to clone the headers except to match PingMetadata's ownership.
444                // But since we're going to write a file to disk in a sec,
445                // and HeaderMaps tend to have only like two things in them, tops,
446                // the cost is bearable.
447                headers: Some(ping.headers.clone()),
448                body_has_info_sections: Some(ping.includes_info_sections),
449                ping_name: Some(ping.name.to_string()),
450                uploader_capabilities: Some(ping.uploader_capabilities.clone()),
451            };
452            file.write_all(::serde_json::to_string(&metadata)?.as_bytes())?;
453        }
454
455        if let Err(e) = std::fs::rename(&temp_ping_path, &ping_path) {
456            log::warn!(
457                "Unable to move '{}' to '{}",
458                temp_ping_path.display(),
459                ping_path.display()
460            );
461            return Err(e);
462        }
463
464        Ok(())
465    }
466
467    /// Clears any pending pings in the queue.
468    pub fn clear_pending_pings(&self, data_path: &Path, ping_names: &[&str]) -> Result<()> {
469        let pings_dir = self.get_pings_dir(data_path, None)?;
470
471        // TODO(bug 1932909): Refactor this into its own function
472        // and share it with `upload::directory`.
473        let entries = pings_dir.read_dir()?;
474        for entry in entries.filter_map(|entry| entry.ok()) {
475            if let Ok(file_type) = entry.file_type() {
476                if !file_type.is_file() {
477                    continue;
478                }
479            } else {
480                continue;
481            }
482
483            let file = match File::open(entry.path()) {
484                Ok(file) => file,
485                Err(_) => {
486                    continue;
487                }
488            };
489
490            let mut lines = BufReader::new(file).lines();
491            if let (Some(Ok(path)), Some(Ok(_body)), Ok(metadata)) =
492                (lines.next(), lines.next(), lines.next().transpose())
493            {
494                let PingMetadata { ping_name, .. } = metadata
495                    .and_then(|m| crate::upload::process_metadata(&path, &m))
496                    .unwrap_or_default();
497                let ping_name =
498                    ping_name.unwrap_or_else(|| path.split('/').nth(3).unwrap_or("").into());
499
500                if ping_names.contains(&&ping_name[..]) {
501                    _ = fs::remove_file(entry.path());
502                }
503            } else {
504                continue;
505            }
506        }
507
508        log::debug!("All pending pings deleted");
509
510        Ok(())
511    }
512}
513
514#[cfg(test)]
515mod test {
516    use super::*;
517    use crate::tests::new_glean;
518
519    #[test]
520    fn sequence_numbers_should_be_reset_when_toggling_uploading() {
521        let (mut glean, _t) = new_glean(None);
522        let ping_maker = PingMaker::new();
523
524        assert_eq!(0, ping_maker.get_ping_seq(&glean, "store1"));
525        assert_eq!(1, ping_maker.get_ping_seq(&glean, "store1"));
526
527        glean.set_upload_enabled(false);
528        assert_eq!(0, ping_maker.get_ping_seq(&glean, "store1"));
529        assert_eq!(0, ping_maker.get_ping_seq(&glean, "store1"));
530
531        glean.set_upload_enabled(true);
532        assert_eq!(0, ping_maker.get_ping_seq(&glean, "store1"));
533        assert_eq!(1, ping_maker.get_ping_seq(&glean, "store1"));
534    }
535
536    #[test]
537    fn test_server_knobs_config_appears_in_ping_info() {
538        use crate::metrics::RemoteSettingsConfig;
539        use std::collections::HashMap;
540
541        let (glean, _t) = new_glean(None);
542
543        // Apply complete Server Knobs config with all three fields
544        let mut metrics_enabled = HashMap::new();
545        metrics_enabled.insert("test.counter".to_string(), true);
546
547        let mut pings_enabled = HashMap::new();
548        pings_enabled.insert("custom".to_string(), false);
549
550        let config = RemoteSettingsConfig {
551            metrics_enabled,
552            pings_enabled,
553            event_threshold: Some(41),
554            session_sample_rate: None,
555        };
556        glean.apply_server_knobs_config(config);
557
558        // Verify complete config structure appears in ping_info
559        let ping_maker = PingMaker::new();
560        let ping_info = ping_maker.get_ping_info(&glean, "store1", None, TimeUnit::Minute);
561
562        let server_knobs = &ping_info["server_knobs_config"];
563        assert_eq!(server_knobs["metrics_enabled"]["test.counter"], true);
564        assert_eq!(server_knobs["pings_enabled"]["custom"], false);
565        assert_eq!(server_knobs["event_threshold"], 41);
566    }
567
568    #[test]
569    fn test_server_knobs_not_included_when_no_config() {
570        let (glean, _t) = new_glean(None);
571
572        let ping_maker = PingMaker::new();
573        let ping_info = ping_maker.get_ping_info(&glean, "store1", None, TimeUnit::Minute);
574
575        assert!(ping_info.get("server_knobs_config").is_none());
576    }
577
578    #[test]
579    fn test_server_knobs_appears_in_all_pings() {
580        use crate::metrics::RemoteSettingsConfig;
581        use std::collections::HashMap;
582
583        let (glean, _t) = new_glean(None);
584
585        let mut metrics_enabled = HashMap::new();
586        metrics_enabled.insert("test.counter".to_string(), true);
587
588        let config = RemoteSettingsConfig {
589            metrics_enabled,
590            ..Default::default()
591        };
592        glean.apply_server_knobs_config(config);
593
594        // Verify config appears in multiple different pings
595        let ping_maker = PingMaker::new();
596        let ping_info1 = ping_maker.get_ping_info(&glean, "store1", None, TimeUnit::Minute);
597        let ping_info2 = ping_maker.get_ping_info(&glean, "store2", None, TimeUnit::Minute);
598
599        assert_eq!(
600            ping_info1["server_knobs_config"]["metrics_enabled"]["test.counter"],
601            true
602        );
603        assert_eq!(
604            ping_info2["server_knobs_config"]["metrics_enabled"]["test.counter"],
605            true
606        );
607    }
608
609    #[test]
610    fn test_server_knobs_config_omits_empty_fields() {
611        use crate::metrics::RemoteSettingsConfig;
612        use std::collections::HashMap;
613
614        let (glean, _t) = new_glean(None);
615
616        // Apply config with only metrics_enabled set; pings_enabled and event_threshold are empty/None
617        let mut metrics_enabled = HashMap::new();
618        metrics_enabled.insert("test.counter".to_string(), true);
619
620        let config = RemoteSettingsConfig {
621            metrics_enabled,
622            ..Default::default()
623        };
624        glean.apply_server_knobs_config(config);
625
626        let ping_maker = PingMaker::new();
627        let ping_info = ping_maker.get_ping_info(&glean, "store1", None, TimeUnit::Minute);
628
629        let server_knobs = &ping_info["server_knobs_config"];
630        // metrics_enabled should be present
631        assert_eq!(server_knobs["metrics_enabled"]["test.counter"], true);
632        // pings_enabled and event_threshold should be absent (not empty object / null)
633        assert!(server_knobs.get("pings_enabled").is_none());
634        assert!(server_knobs.get("event_threshold").is_none());
635    }
636}