Skip to main content

apollo_router/orbiter/
mod.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3use std::str::FromStr;
4use std::sync::Arc;
5use std::time::Duration;
6
7use async_trait::async_trait;
8use clap::CommandFactory;
9use http::header::CONTENT_TYPE;
10use http::header::USER_AGENT;
11use jsonpath_rust::JsonPathInst;
12use mime::APPLICATION_JSON;
13use once_cell::sync::OnceCell;
14use serde::Serialize;
15use serde_json::Map;
16use serde_json::Value;
17use tower::BoxError;
18use uuid::Uuid;
19
20use crate::Configuration;
21use crate::configuration::generate_config_schema;
22use crate::executable::Opt;
23use crate::plugin::DynPlugin;
24use crate::router_factory::RouterSuperServiceFactory;
25use crate::router_factory::YamlRouterFactory;
26use crate::services::HasSchema;
27use crate::services::router::service::RouterCreator;
28use crate::spec::Schema;
29use crate::uplink::license_enforcement::LicenseState;
30
31/// This session id is created once when the router starts. It persists between config reloads and supergraph schema changes.
32static SESSION_ID: OnceCell<Uuid> = OnceCell::new();
33
34/// Platform represents the platform the CLI is being run from
35#[derive(Debug, Serialize)]
36struct Platform {
37    /// the platform from which the command was run (i.e. linux, macOS, windows or even wsl)
38    os: String,
39
40    /// if we think this command is being run in CI
41    continuous_integration: Option<ci_info::types::Vendor>,
42}
43
44/// A usage report for the router
45#[derive(Serialize)]
46struct UsageReport {
47    /// A random ID that is generated on first startup of the Router. It is not persistent between restarts of the Router, but will be persistent for hot reloads
48    session_id: Uuid,
49    /// The version of the Router
50    version: String,
51    /// Information about the current architecture/platform
52    platform: Platform,
53    /// Information about what was being used
54    usage: Map<String, Value>,
55}
56
57impl OrbiterRouterSuperServiceFactory {
58    pub(crate) fn new(delegate: YamlRouterFactory) -> OrbiterRouterSuperServiceFactory {
59        OrbiterRouterSuperServiceFactory { delegate }
60    }
61}
62
63/// A service factory that will report some anonymous telemetry to Apollo. It can be disabled by users, but the data is useful for helping us to decide where to spend our efforts.
64/// The data sent looks something like this:
65/// ```json
66/// {
67///   "session_id": "fbe09da3-ebdb-4863-8086-feb97464b8d7",
68///   "version": "1.4.0", // The version of the router
69///   "os": "linux",
70///   "ci": null,
71///   "usage": {
72///     "configuration.headers.all.request.propagate.named.<redacted>": 3
73///     "configuration.headers.all.request.propagate.default.<redacted>": 1
74///     "configuration.headers.all.request.len": 3
75///     "configuration.headers.subgraphs.<redacted>.request.propagate.named.<redacted>": 2
76///     "configuration.headers.subgraphs.<redacted>.request.len": 2
77///     "configuration.headers.subgraphs.len": 1
78///     "configuration.homepage.enabled.true": 1
79///     "args.config-path.<redacted>": 1,
80///     "args.hot-reload.true": 1,
81///     //Many more keys. This is dynamic and will change over time.
82///     //More...
83///     //More...
84///     //More...
85///   }
86/// }
87/// ```
88#[derive(Default)]
89pub(crate) struct OrbiterRouterSuperServiceFactory {
90    delegate: YamlRouterFactory,
91}
92
93#[async_trait]
94impl RouterSuperServiceFactory for OrbiterRouterSuperServiceFactory {
95    type RouterFactory = RouterCreator;
96
97    async fn create<'a>(
98        &'a mut self,
99        is_telemetry_disabled: bool,
100        configuration: Arc<Configuration>,
101        schema: Arc<Schema>,
102        previous_router: Option<&'a Self::RouterFactory>,
103        extra_plugins: Option<Vec<(String, Box<dyn DynPlugin>)>>,
104        license: Arc<LicenseState>,
105    ) -> Result<Self::RouterFactory, BoxError> {
106        self.delegate
107            .create(
108                is_telemetry_disabled,
109                configuration.clone(),
110                schema.clone(),
111                previous_router,
112                extra_plugins,
113                license,
114            )
115            .await
116            .inspect(|factory| {
117                if !is_telemetry_disabled {
118                    let schema = factory.supergraph_creator.schema();
119
120                    tokio::task::spawn(async move {
121                        tracing::debug!("sending anonymous usage data to Apollo");
122                        let report = create_report(configuration, schema);
123                        if let Err(e) = send(report).await {
124                            tracing::debug!("failed to send usage report: {}", e);
125                        }
126                    });
127                }
128            })
129    }
130}
131
132fn create_report(configuration: Arc<Configuration>, _schema: Arc<Schema>) -> UsageReport {
133    let mut configuration: Value = configuration
134        .validated_yaml
135        .clone()
136        .unwrap_or_else(|| Value::Object(Default::default()));
137    let os = get_os();
138    let mut usage = HashMap::new();
139
140    // We only report apollo plugins. This way we don't risk leaking sensitive data if the user has customized the router and added their own plugins.
141    usage.insert(
142        "configuration.plugins.len".to_string(),
143        configuration
144            .get("plugins")
145            .and_then(|plugins| plugins.as_array())
146            .map(|plugins| plugins.len())
147            .unwrap_or_default() as u64,
148    );
149
150    // Make sure the config is an object, but don't fail if it wasn't
151    if !configuration.is_object() {
152        configuration = Value::Object(Default::default());
153    }
154
155    // Delete the plugins block so that we don't report on it.
156    // A custom plugin may have configuration that is sensitive.
157    configuration
158        .as_object_mut()
159        .expect("configuration should have been an object")
160        .remove("plugins");
161
162    // Visit the config
163    visit_config(&mut usage, &configuration);
164
165    // Check the command line options. This encapsulates both env and command line functionality
166    // This won't work in tests so we have separate test code.
167    #[cfg(not(test))]
168    visit_args(&mut usage, std::env::args().collect());
169
170    UsageReport {
171        session_id: *SESSION_ID.get_or_init(Uuid::new_v4),
172        version: std::env!("CARGO_PKG_VERSION").to_string(),
173        platform: Platform {
174            os,
175            continuous_integration: ci_info::get().vendor,
176        },
177        usage: usage
178            .into_iter()
179            .map(|(k, v)| (k, Value::Number(v.into())))
180            .collect(),
181    }
182}
183
184fn visit_args(usage: &mut HashMap<String, u64>, args: Vec<String>) {
185    let matches = Opt::command().get_matches_from(args);
186
187    Opt::command().get_arguments().for_each(|a| {
188        let defaults = a.get_default_values().to_vec();
189        if let Some(values) = matches.get_raw(a.get_id().as_str()) {
190            let values = values.collect::<Vec<_>>();
191
192            // First check booleans, then only record if the value differed from the default
193            if values == ["true"] || values == ["false"] {
194                if values == ["true"] {
195                    usage.insert(format!("args.{}.true", a.get_id()), 1);
196                }
197            } else if defaults != values {
198                usage.insert(format!("args.{}.<redacted>", a.get_id()), 1);
199            }
200        }
201    });
202}
203
204async fn send(body: UsageReport) -> Result<String, BoxError> {
205    tracing::debug!(
206        "transmitting anonymous analytics: {}",
207        serde_json::to_string_pretty(&body)?
208    );
209
210    #[cfg(not(test))]
211    let url = "https://router.apollo.dev/telemetry";
212    #[cfg(test)]
213    let url = "http://localhost:8888/telemetry";
214
215    Ok(reqwest::Client::new()
216        .post(url)
217        .header(USER_AGENT, "router")
218        .header(CONTENT_TYPE, APPLICATION_JSON.essence_str())
219        .json(&serde_json::to_value(body)?)
220        .timeout(Duration::from_secs(10))
221        .send()
222        .await?
223        .text()
224        .await?)
225}
226
227fn get_os() -> String {
228    if wsl::is_wsl() {
229        "wsl"
230    } else {
231        std::env::consts::OS
232    }
233    .to_string()
234}
235
236fn visit_config(usage: &mut HashMap<String, u64>, config: &Value) {
237    // We have to be careful not to expose names of headers, metadata or anything else sensitive.
238    let raw_json_schema =
239        serde_json::to_value(generate_config_schema()).expect("config schema must be valid");
240    // We can't use json schema to redact the config as we don't have the annotations.
241    // Instead, we get the set of properties from the schema and anything that doesn't match a property is redacted.
242    let path = JsonPathInst::from_str("$..properties").expect("properties path must be valid");
243    let slice = path.find_slice(&raw_json_schema);
244    let schema_properties: HashSet<String> = slice
245        .iter()
246        .filter_map(|v| v.as_object())
247        .flat_map(|o| o.keys())
248        .map(|s| s.to_string())
249        .collect();
250
251    // Now for each leaf in the config we get the path and redact anything that isn't in the schema.
252    visit_value(&schema_properties, usage, config, "");
253}
254
255fn visit_value(
256    schema_properties: &HashSet<String>,
257    usage: &mut HashMap<String, u64>,
258    value: &Value,
259    path: &str,
260) {
261    match value {
262        Value::Bool(value) => {
263            *usage
264                .entry(format!("configuration.{path}.{value}"))
265                .or_default() += 1;
266        }
267        Value::Number(value) => {
268            *usage
269                .entry(format!("configuration.{path}.{value}"))
270                .or_default() += 1;
271        }
272        Value::String(_) => {
273            // Strings are never output
274            *usage
275                .entry(format!("configuration.{path}.<redacted>"))
276                .or_default() += 1;
277        }
278        Value::Object(o) => {
279            for (key, value) in o {
280                let key = if schema_properties.contains(key) {
281                    key
282                } else {
283                    "<redacted>"
284                };
285
286                if path.is_empty() {
287                    visit_value(schema_properties, usage, value, key);
288                } else {
289                    visit_value(schema_properties, usage, value, &format!("{path}.{key}"));
290                    *usage
291                        .entry(format!("configuration.{path}.{key}.len"))
292                        .or_default() += 1;
293                }
294            }
295        }
296        Value::Array(a) => {
297            for value in a {
298                visit_value(schema_properties, usage, value, path);
299            }
300            *usage
301                .entry(format!("configuration.{path}.array.len"))
302                .or_default() += a.len() as u64;
303        }
304        Value::Null => {}
305    }
306}
307
308#[cfg(test)]
309mod test {
310    use std::collections::HashMap;
311    use std::env;
312    use std::str::FromStr;
313    use std::sync::Arc;
314
315    use insta::assert_yaml_snapshot;
316    use serde_json::Value;
317    use serde_json::json;
318
319    use crate::Configuration;
320    use crate::configuration::ConfigurationError;
321    use crate::orbiter::create_report;
322    use crate::orbiter::visit_args;
323    use crate::orbiter::visit_config;
324
325    #[test]
326    fn test_visit_args() {
327        let mut usage = HashMap::new();
328        visit_args(
329            &mut usage,
330            ["router", "--config", "a", "--hot-reload"]
331                .into_iter()
332                .map(|a| a.to_string())
333                .collect(),
334        );
335        usage.remove("args.anonymous_telemetry_disabled.true");
336        usage.remove("args.apollo_graph_ref.<redacted>");
337        usage.remove("args.apollo_key.<redacted>");
338        insta::with_settings!({sort_maps => true}, {
339            assert_yaml_snapshot!(usage);
340        });
341    }
342
343    // The following two tests are ignored because since allowing refs in schema we can no longer
344    // examine the annotations for redaction.
345    // https://github.com/Stranger6667/jsonschema-rs/issues/403
346    // We should remove the orbiter code and move to otel for both anonymous and non-anonymous telemetry.
347    #[test]
348    fn test_visit_config() {
349        let config = Configuration::from_str(include_str!("testdata/redaction.router.yaml"))
350            .expect("yaml must be valid");
351        let mut usage = HashMap::new();
352        visit_config(
353            &mut usage,
354            config
355                .validated_yaml
356                .as_ref()
357                .expect("config should have had validated_yaml"),
358        );
359        insta::with_settings!({sort_maps => true}, {
360            assert_yaml_snapshot!(usage);
361        });
362    }
363
364    #[test]
365    fn test_visit_config_that_needed_upgrade() {
366        let result: ConfigurationError =
367            Configuration::from_str("supergraph:\n  preview_defer_support: true")
368                .expect_err("expected an error");
369        // Note: Can't implement PartialEq on ConfigurationError, so...
370        let err_message = "configuration had errors";
371        let err_error = "\n1. at line 2\n\n  supergraph:\nā”Œ   preview_defer_support: true\nā””-----> Additional properties are not allowed ('preview_defer_support' was unexpected)\n\n".to_string();
372        matches!(result, ConfigurationError::InvalidConfiguration {message, error} if err_message == message && err_error == error);
373    }
374
375    #[test]
376    fn test_create_report() {
377        let config = Configuration::from_str(include_str!("testdata/redaction.router.yaml"))
378            .expect("config must be valid");
379        let schema_string = include_str!("../testdata/minimal_supergraph.graphql");
380        let schema = crate::spec::Schema::parse(schema_string, &Default::default()).unwrap();
381        let report = create_report(Arc::new(config), Arc::new(schema));
382        insta::with_settings!({sort_maps => true}, {
383                    assert_yaml_snapshot!(report, {
384                ".version" => "[version]",
385                ".session_id" => "[session_id]",
386                ".platform.os" => "[os]",
387                ".platform.continuous_integration" => "[ci]",
388            });
389        });
390    }
391
392    #[test]
393    fn test_create_report_incorrect_type_validated_yaml() {
394        let mut config = Configuration::from_str(include_str!("testdata/redaction.router.yaml"))
395            .expect("config must be valid");
396        config.validated_yaml = Some(Value::Null);
397        let schema_string = include_str!("../testdata/minimal_supergraph.graphql");
398        let schema = crate::spec::Schema::parse(schema_string, &Default::default()).unwrap();
399        let report = create_report(Arc::new(config), Arc::new(schema));
400        insta::with_settings!({sort_maps => true}, {
401                    assert_yaml_snapshot!(report, {
402                ".version" => "[version]",
403                ".session_id" => "[session_id]",
404                ".platform.os" => "[os]",
405                ".platform.continuous_integration" => "[ci]",
406            });
407        });
408    }
409
410    #[test]
411    fn test_create_report_invalid_validated_yaml() {
412        let mut config = Configuration::from_str(include_str!("testdata/redaction.router.yaml"))
413            .expect("config must be valid");
414        config.validated_yaml = Some(json!({"garbage": "garbage"}));
415        let schema_string = include_str!("../testdata/minimal_supergraph.graphql");
416        let schema = crate::spec::Schema::parse(schema_string, &Default::default()).unwrap();
417        let report = create_report(Arc::new(config), Arc::new(schema));
418        insta::with_settings!({sort_maps => true}, {
419                    assert_yaml_snapshot!(report, {
420                ".version" => "[version]",
421                ".session_id" => "[session_id]",
422                ".platform.os" => "[os]",
423                ".platform.continuous_integration" => "[ci]",
424            });
425        });
426    }
427
428    // TODO, enable once we are live.
429    // #[test]
430    // fn test_send() {
431    //     let response = send(UsageReport {
432    //         session_id: Uuid::from_str("433c123c-8dba-11ed-a1eb-0242ac120002").expect("uuid"),
433    //         version: "session2".to_string(),
434    //         platform: Platform {
435    //             os: "test".to_string(),
436    //             continuous_integration: Some(Vendor::CircleCI),
437    //         },
438    //         usage: Default::default(),
439    //     })
440    //     .expect("expected send to succeed");
441    //
442    //     assert_eq!(response, "Report received");
443    // }
444}