// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
#![deny(missing_docs)]
//! Glean is a modern approach for recording and sending Telemetry data.
//!
//! It's in use at Mozilla.
//!
//! All documentation can be found online:
//!
//! ## [The Glean SDK Book](https://mozilla.github.io/glean)
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use chrono::{DateTime, FixedOffset};
use lazy_static::lazy_static;
use uuid::Uuid;
mod macros;
pub mod ac_migration;
mod common_metric_data;
mod database;
mod error;
mod error_recording;
mod event_database;
mod histogram;
mod internal_metrics;
pub mod metrics;
pub mod ping;
pub mod storage;
mod util;
use crate::ac_migration::migrate_sequence_numbers;
pub use crate::common_metric_data::{CommonMetricData, Lifetime};
use crate::database::Database;
pub use crate::error::{Error, Result};
pub use crate::error_recording::{test_get_num_recorded_errors, ErrorType};
use crate::event_database::EventDatabase;
use crate::internal_metrics::CoreMetrics;
use crate::metrics::PingType;
use crate::ping::PingMaker;
use crate::storage::StorageManager;
use crate::util::{local_now_with_offset, sanitize_application_id};
const GLEAN_SCHEMA_VERSION: u32 = 1;
const DEFAULT_MAX_EVENTS: usize = 500;
lazy_static! {
static ref KNOWN_CLIENT_ID: Uuid =
Uuid::parse_str("c0ffeec0-ffee-c0ff-eec0-ffeec0ffeec0").unwrap();
}
/// The Glean configuration.
///
/// Optional values will be filled in with default values.
#[derive(Debug, Clone)]
pub struct Configuration {
/// Whether upload should be enabled.
pub upload_enabled: bool,
/// Path to a directory to store all data in.
pub data_path: String,
/// The application ID (will be sanitized during initialization).
pub application_id: String,
/// The maximum number of events to store before sending a ping containing events.
pub max_events: Option<usize>,
}
/// The object holding meta information about a Glean instance.
///
/// ## Example
///
/// Create a new Glean instance, register a ping, record a simple counter and then send the final
/// ping.
///
/// ```rust,no_run
/// # use glean_core::{Glean, Configuration, CommonMetricData, metrics::*};
/// let cfg = Configuration {
/// data_path: "/tmp/glean".into(),
/// application_id: "glean.sample.app".into(),
/// upload_enabled: true,
/// max_events: None,
/// };
/// let mut glean = Glean::new(cfg).unwrap();
/// let ping = PingType::new("sample", true);
/// glean.register_ping_type(&ping);
///
/// let call_counter: CounterMetric = CounterMetric::new(CommonMetricData {
/// name: "calls".into(),
/// category: "local".into(),
/// send_in_pings: vec!["sample".into()],
/// ..Default::default()
/// });
///
/// call_counter.add(&glean, 1);
///
/// glean.send_ping(&ping).unwrap();
/// ```
///
/// ## Note
///
/// In specific language bindings, this is usually wrapped in a singleton and all metric recording goes to a single instance of this object.
/// In the Rust core, it is possible to create multiple instances, which is used in testing.
#[derive(Debug)]
pub struct Glean {
upload_enabled: bool,
data_store: Database,
event_data_store: EventDatabase,
core_metrics: CoreMetrics,
data_path: PathBuf,
application_id: String,
ping_registry: HashMap<String, PingType>,
start_time: DateTime<FixedOffset>,
max_events: usize,
}
impl Glean {
/// Create and initialize a new Glean object.
///
/// This will create the necessary directories and files in `data_path`.
/// This will also initialize the core metrics.
pub fn new(cfg: Configuration) -> Result<Self> {
log::info!("Creating new Glean");
let application_id = sanitize_application_id(&cfg.application_id);
// Creating the data store creates the necessary path as well.
// If that fails we bail out and don't initialize further.
let data_store = Database::new(&cfg.data_path)?;
let event_data_store = EventDatabase::new(&cfg.data_path)?;
let mut glean = Self {
upload_enabled: cfg.upload_enabled,
data_store,
event_data_store,
core_metrics: CoreMetrics::new(),
data_path: PathBuf::from(cfg.data_path),
application_id,
ping_registry: HashMap::new(),
start_time: local_now_with_offset(),
max_events: cfg.max_events.unwrap_or(DEFAULT_MAX_EVENTS),
};
glean.on_change_upload_enabled(cfg.upload_enabled);
Ok(glean)
}
/// Create and initialize a new Glean object.
///
/// This will attempt to delete any previously existing database and
/// then create the necessary directories and files in `data_path`.
/// This will also initialize the core metrics.
///
/// # Arguments
///
/// * `cfg` - an instance of the Glean `Configuration`.
/// * `new_sequence_nums` - a map of ("<pingName>_seq", sequence number)
/// used to initialize Glean with sequence numbers imported from glean-ac.
pub fn with_sequence_numbers(
cfg: Configuration,
new_sequence_nums: HashMap<String, i32>,
) -> Result<Self> {
log::info!("Creating new Glean (migrating data)");
// Delete the database directory, if it exists. Bail out if there's
// errors, as I'm not sure what else could be done if we can't even
// delete a directory we own.
let db_path = Path::new(&cfg.data_path).join("db");
if db_path.exists() {
std::fs::remove_dir_all(db_path)?;
}
let glean = Self::new(cfg)?;
// Set sequence numbers coming through the FFI.
migrate_sequence_numbers(&glean, new_sequence_nums);
Ok(glean)
}
/// For tests make it easy to create a Glean object using only the required configuration.
#[cfg(test)]
pub(crate) fn with_options(
data_path: &str,
application_id: &str,
upload_enabled: bool,
) -> Result<Self> {
let cfg = Configuration {
data_path: data_path.into(),
application_id: application_id.into(),
upload_enabled,
max_events: None,
};
Self::new(cfg)
}
/// Initialize the core metrics managed by Glean's Rust core.
fn initialize_core_metrics(&mut self) {
let need_new_client_id = match self
.core_metrics
.client_id
.get_value(self, "glean_client_info")
{
None => true,
Some(uuid) => uuid == *KNOWN_CLIENT_ID,
};
if need_new_client_id {
self.core_metrics.client_id.generate_and_set(self);
}
if self
.core_metrics
.first_run_date
.get_value(self, "glean_client_info")
.is_none()
{
self.core_metrics.first_run_date.set(self, None);
}
}
/// Called when Glean is initialized to the point where it can correctly
/// assemble pings. Usually called from the language specific layer after all
/// of the core metrics have been set and the ping types have been
/// registered.
///
/// # Return value
///
/// `true` if at least one ping was generated, `false` otherwise.
pub fn on_ready_to_send_pings(&self) -> bool {
self.event_data_store.flush_pending_events_on_startup(&self)
}
/// Set whether upload is enabled or not.
///
/// When uploading is disabled, metrics aren't recorded at all and no
/// data is uploaded.
///
/// When disabling, all pending metrics, events and queued pings are cleared.
///
/// When enabling, the core Glean metrics are recreated.
///
/// If the value of this flag is not actually changed, this is a no-op.
///
/// # Arguments
///
/// * `flag` - When true, enable metric collection.
///
/// # Returns
///
/// * Returns true when the flag was different from the current value, and
/// actual work was done to clear or reinstate metrics.
pub fn set_upload_enabled(&mut self, flag: bool) -> bool {
log::info!("Upload enabled: {:?}", flag);
if self.upload_enabled != flag {
self.upload_enabled = flag;
self.on_change_upload_enabled(flag);
true
} else {
false
}
}
/// Determine whether upload is enabled.
///
/// When upload is disabled, no data will be recorded.
pub fn is_upload_enabled(&self) -> bool {
self.upload_enabled
}
/// Handles the changing of state when upload_enabled changes.
///
/// Should only be called when the state actually changes.
/// When disabling, all pending metrics, events and queued pings are cleared.
///
/// When enabling, the core Glean metrics are recreated.
///
/// # Arguments
///
/// * `flag` - When true, enable metric collection.
fn on_change_upload_enabled(&mut self, flag: bool) {
if flag {
self.initialize_core_metrics();
} else {
self.clear_metrics();
}
}
/// Clear any pending metrics when telemetry is disabled.
fn clear_metrics(&mut self) {
// There is only one metric that we want to survive after clearing all
// metrics: first_run_date. Here, we store its value so we can restore
// it after clearing the metrics.
let existing_first_run_date = self
.core_metrics
.first_run_date
.get_value(self, "glean_client_info");
// Clear any pending pings.
let ping_maker = PingMaker::new();
if let Err(err) = ping_maker.clear_pending_pings(self.get_data_path()) {
log::error!("Error clearing pending pings: {}", err);
}
// Delete all stored metrics.
// Note that this also includes the ping sequence numbers, so it has
// the effect of resetting those to their initial values.
self.data_store.clear_all();
if let Err(err) = self.event_data_store.clear_all() {
log::error!("Error clearing pending events: {}", err);
}
// This does not clear the experiments store (which isn't managed by the
// StorageEngineManager), since doing so would mean we would have to have the
// application tell us again which experiments are active if telemetry is
// re-enabled.
{
// We need to briefly set upload_enabled to true here so that `set`
// is not a no-op. This is safe, since nothing on the Rust side can
// run concurrently to this since we hold a mutable reference to the
// Glean object. Additionally, the pending pings have been cleared
// from disk, so the PingUploader can't wake up and start sending
// pings.
self.upload_enabled = true;
// Store a "dummy" KNOWN_CLIENT_ID in the client_id metric. This will
// make it easier to detect if pings were unintentionally sent after
// uploading is disabled.
self.core_metrics.client_id.set(self, *KNOWN_CLIENT_ID);
// Restore the first_run_date.
if let Some(existing_first_run_date) = existing_first_run_date {
self.core_metrics
.first_run_date
.set(self, Some(existing_first_run_date));
}
self.upload_enabled = false;
}
}
/// Get the application ID as specified on instantiation.
pub fn get_application_id(&self) -> &str {
&self.application_id
}
/// Get the data path of this instance.
pub fn get_data_path(&self) -> &Path {
&self.data_path
}
/// Get a handle to the database.
pub fn storage(&self) -> &Database {
&self.data_store
}
/// Get a handle to the event database.
pub fn event_storage(&self) -> &EventDatabase {
&self.event_data_store
}
/// Get the maximum number of events to store before sending a ping.
pub fn get_max_events(&self) -> usize {
self.max_events
}
/// Take a snapshot for the given store and optionally clear it.
///
/// ## Arguments
///
/// * `store_name` - The store to snapshot.
/// * `clear_store` - Whether to clear the store after snapshotting.
///
/// ## Return value
///
/// Returns the snapshot in a string encoded as JSON.
/// If the snapshot is empty, it returns an empty string.
pub fn snapshot(&mut self, store_name: &str, clear_store: bool) -> String {
StorageManager
.snapshot(&self.storage(), store_name, clear_store)
.unwrap_or_else(|| String::from(""))
}
fn make_path(&self, ping_name: &str, doc_id: &str) -> String {
format!(
"/submit/{}/{}/{}/{}",
self.get_application_id(),
ping_name,
GLEAN_SCHEMA_VERSION,
doc_id
)
}
/// Send a ping.
///
/// The ping content is assembled as soon as possible, but upload is not
/// guaranteed to happen immediately, as that depends on the upload
/// policies.
///
/// If the ping currently contains no content, it will not be sent.
///
/// Returns true if a ping was assembled and queued, false otherwise.
/// Returns an error if collecting or writing the ping to disk failed.
pub fn send_ping(&self, ping: &PingType) -> Result<bool> {
let ping_maker = PingMaker::new();
let doc_id = Uuid::new_v4().to_string();
let url_path = self.make_path(&ping.name, &doc_id);
match ping_maker.collect(self, &ping) {
None => {
log::info!(
"No content for ping '{}', therefore no ping queued.",
ping.name
);
Ok(false)
}
Some(content) => {
if let Err(e) =
ping_maker.store_ping(&doc_id, &self.get_data_path(), &url_path, &content)
{
log::warn!("IO error while writing ping to file: {}", e);
return Err(e.into());
}
log::info!(
"The ping '{}' was submitted and will be sent as soon as possible",
ping.name
);
Ok(true)
}
}
}
/// Send a list of pings by name.
///
/// See `send_ping` for detailed information.
///
/// Returns true if at least one ping was assembled and queued, false otherwise.
pub fn send_pings_by_name(&self, ping_names: &[String]) -> bool {
// TODO: 1553813: glean-ac collects and stores pings in parallel and then joins them all before queueing the worker.
// This here is writing them out sequentially.
let mut result = false;
for ping_name in ping_names {
if let Ok(true) = self.send_ping_by_name(ping_name) {
result = true;
}
}
result
}
/// Send a ping by name.
///
/// The ping content is assembled as soon as possible, but upload is not
/// guaranteed to happen immediately, as that depends on the upload
/// policies.
///
/// If the ping currently contains no content, it will not be sent.
///
/// Returns true if a ping was assembled and queued, false otherwise.
/// Returns an error if collecting or writing the ping to disk failed.
pub fn send_ping_by_name(&self, ping_name: &str) -> Result<bool> {
match self.get_ping_by_name(ping_name) {
None => {
log::error!("Attempted to send unknown ping '{}'", ping_name);
Ok(false)
}
Some(ping) => self.send_ping(ping),
}
}
/// Get a [`PingType`](metrics/struct.PingType.html) by name.
///
/// ## Return value
///
/// Returns the `PingType` if a ping of the given name was registered before.
/// Returns `None` otherwise.
pub fn get_ping_by_name(&self, ping_name: &str) -> Option<&PingType> {
self.ping_registry.get(ping_name)
}
/// Register a new [`PingType`](metrics/struct.PingType.html).
pub fn register_ping_type(&mut self, ping: &PingType) {
if self.ping_registry.contains_key(&ping.name) {
log::error!("Duplicate ping named '{}'", ping.name)
}
self.ping_registry.insert(ping.name.clone(), ping.clone());
}
/// Get create time of the Glean object.
pub(crate) fn start_time(&self) -> DateTime<FixedOffset> {
self.start_time
}
/// Indicate that an experiment is running.
/// Glean will then add an experiment annotation to the environment
/// which is sent with pings. This information is not persisted between runs.
///
/// ## Arguments
///
/// * `experiment_id` - The id of the active experiment (maximum 30 bytes).
/// * `branch` - The experiment branch (maximum 30 bytes).
/// * `extra` - Optional metadata to output with the ping.
pub fn set_experiment_active(
&self,
experiment_id: String,
branch: String,
extra: Option<HashMap<String, String>>,
) {
let metric = metrics::ExperimentMetric::new(&self, experiment_id);
metric.set_active(&self, branch, extra);
}
/// Indicate that an experiment is no longer running.
///
/// ## Arguments
///
/// * `experiment_id` - The id of the active experiment to deactivate (maximum 30 bytes).
pub fn set_experiment_inactive(&self, experiment_id: String) {
let metric = metrics::ExperimentMetric::new(&self, experiment_id);
metric.set_inactive(&self);
}
/// **Test-only API (exported for FFI purposes).**
///
/// Check if an experiment is currently active.
///
/// ## Arguments
///
/// * `experiment_id` - The id of the experiment (maximum 30 bytes).
///
/// ## Return value
///
/// True if the experiment is active, false otherwise.
pub fn test_is_experiment_active(&self, experiment_id: String) -> bool {
self.test_get_experiment_data_as_json(experiment_id)
.is_some()
}
/// **Test-only API (exported for FFI purposes).**
///
/// Get stored data for the requested experiment.
///
/// ## Arguments
///
/// * `experiment_id` - The id of the active experiment (maximum 30 bytes).
///
/// ## Return value
///
/// If the requested experiment is active, a JSON string with the following format:
/// { 'branch': 'the-branch-name', 'extra': {'key': 'value', ...}}
/// Otherwise, None.
pub fn test_get_experiment_data_as_json(&self, experiment_id: String) -> Option<String> {
let metric = metrics::ExperimentMetric::new(&self, experiment_id);
metric.test_get_value_as_json_string(&self)
}
/// **Test-only API (exported for FFI purposes).**
///
/// Delete all stored metrics.
/// Note that this also includes the ping sequence numbers, so it has
/// the effect of resetting those to their initial values.
pub fn test_clear_all_stores(&self) {
self.data_store.clear_all();
// We don't care about this failing, maybe the data does just not exist.
let _ = self.event_data_store.clear_all();
}
}
// Split unit tests to a separate file, to reduce the file of this one.
#[cfg(test)]
#[cfg(test)]
#[path = "lib_unit_tests.rs"]
mod tests;