re_data_loader/lib.rs
1//! Handles loading of Rerun data from file using data loader plugins.
2
3use std::sync::{Arc, LazyLock};
4
5use re_chunk::{Chunk, ChunkResult};
6use re_log_types::{ArrowMsg, EntityPath, LogMsg, RecordingId, StoreId, TimePoint};
7
8// ----------------------------------------------------------------------------
9
10mod load_file;
11mod loader_archetype;
12mod loader_directory;
13mod loader_rrd;
14mod loader_urdf;
15
16#[cfg(not(target_arch = "wasm32"))]
17pub mod lerobot;
18
19// This loader currently only works when loading the entire dataset directory, and we cannot do that on web yet.
20#[cfg(not(target_arch = "wasm32"))]
21pub mod loader_lerobot;
22
23// This loader currently uses native-only features under the hood, and we cannot do that on web yet.
24pub mod loader_mcap;
25
26#[cfg(not(target_arch = "wasm32"))]
27mod loader_external;
28
29pub use self::loader_mcap::McapLoader;
30
31pub use self::{
32 load_file::load_from_file_contents, loader_archetype::ArchetypeLoader,
33 loader_directory::DirectoryLoader, loader_rrd::RrdLoader, loader_urdf::UrdfDataLoader,
34 loader_urdf::UrdfTree,
35};
36
37#[cfg(not(target_arch = "wasm32"))]
38pub use self::{
39 load_file::load_from_path,
40 loader_external::{
41 EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE, EXTERNAL_DATA_LOADER_PREFIX, ExternalLoader,
42 iter_external_loaders,
43 },
44 loader_lerobot::LeRobotDatasetLoader,
45};
46
47pub mod external {
48 pub use urdf_rs;
49}
50
51// ----------------------------------------------------------------------------
52
53/// Recommended settings for the [`DataLoader`].
54///
55/// The loader is free to ignore some or all of these.
56///
57/// External [`DataLoader`]s will be passed the following CLI parameters:
58/// * `--application-id <application_id>`
59/// * `--opened-application-id <opened_application_id>` (if set)
60/// * `--recording-id <store_id>`
61/// * `--opened-recording-id <opened_store_id>` (if set)
62/// * `--entity-path-prefix <entity_path_prefix>` (if set)
63/// * `--static` (if `timepoint` is set to the timeless timepoint)
64/// * `--timeless` \[deprecated\] (if `timepoint` is set to the timeless timepoint)
65/// * `--time_sequence <timeline1>=<seq1> <timeline2>=<seq2> ...` (if `timepoint` contains sequence data)
66/// * `--time_duration_nanos <timeline1>=<duration1> <timeline2>=<duration2> ...` (if `timepoint` contains duration data) in nanos
67/// * `--time_timestamp_nanos <timeline1>=<timestamp1> <timeline2>=<timestamp2> ...` (if `timepoint` contains timestamp data) in nanos since epoch
68#[derive(Debug, Clone)]
69pub struct DataLoaderSettings {
70 /// The recommended [`re_log_types::ApplicationId`] to log the data to, based on the surrounding context.
71 pub application_id: Option<re_log_types::ApplicationId>,
72
73 /// The recommended recording id to log the data to, based on the surrounding context.
74 ///
75 /// Log data to this recording if you want it to appear in a new recording shared by all
76 /// data-loaders for the current loading session.
77 pub recording_id: RecordingId,
78
79 /// The [`re_log_types::StoreId`] that is currently opened in the viewer, if any.
80 pub opened_store_id: Option<StoreId>,
81
82 /// Whether `SetStoreInfo`s should be sent, regardless of the surrounding context.
83 ///
84 /// Only useful when creating a recording just-in-time directly in the viewer (which is what
85 /// happens when importing things into the welcome screen).
86 pub force_store_info: bool,
87
88 /// What should the logged entity paths be prefixed with?
89 pub entity_path_prefix: Option<EntityPath>,
90
91 /// At what time(s) should the data be logged to?
92 pub timepoint: Option<TimePoint>,
93}
94
95impl DataLoaderSettings {
96 #[inline]
97 pub fn recommended(recording_id: impl Into<RecordingId>) -> Self {
98 Self {
99 application_id: Default::default(),
100 recording_id: recording_id.into(),
101 opened_store_id: Default::default(),
102 force_store_info: false,
103 entity_path_prefix: Default::default(),
104 timepoint: Default::default(),
105 }
106 }
107
108 /// Returns the recommended [`re_log_types::StoreId`] to log the data to.
109 pub fn recommended_store_id(&self) -> StoreId {
110 StoreId::recording(
111 self.application_id
112 .clone()
113 .unwrap_or_else(re_log_types::ApplicationId::random),
114 self.recording_id.clone(),
115 )
116 }
117
118 /// Returns the currently opened [`re_log_types::StoreId`] if any. Otherwise, returns the
119 /// recommended store id.
120 pub fn opened_store_id_or_recommended(&self) -> StoreId {
121 self.opened_store_id
122 .clone()
123 .unwrap_or_else(|| self.recommended_store_id())
124 }
125
126 /// Generates CLI flags from these settings, for external data loaders.
127 pub fn to_cli_args(&self) -> Vec<String> {
128 let Self {
129 application_id,
130 recording_id,
131 opened_store_id,
132 force_store_info: _,
133 entity_path_prefix,
134 timepoint,
135 } = self;
136
137 let mut args = Vec::new();
138
139 if let Some(application_id) = application_id {
140 args.extend(["--application-id".to_owned(), format!("{application_id}")]);
141 }
142 args.extend(["--recording-id".to_owned(), format!("{recording_id}")]);
143
144 if let Some(opened_store_id) = opened_store_id {
145 args.extend([
146 "--opened-application-id".to_owned(),
147 format!("{}", opened_store_id.application_id()),
148 ]);
149
150 args.extend([
151 "--opened-recording-id".to_owned(),
152 format!("{}", opened_store_id.recording_id()),
153 ]);
154 }
155
156 if let Some(entity_path_prefix) = entity_path_prefix {
157 args.extend([
158 "--entity-path-prefix".to_owned(),
159 format!("{entity_path_prefix}"),
160 ]);
161 }
162
163 if let Some(timepoint) = timepoint {
164 if timepoint.is_static() {
165 args.push("--timeless".to_owned()); // for backwards compatibility
166 args.push("--static".to_owned());
167 }
168
169 for (timeline, cell) in timepoint.iter() {
170 match cell.typ() {
171 re_log_types::TimeType::Sequence => {
172 args.extend([
173 "--time_sequence".to_owned(),
174 format!("{timeline}={}", cell.value),
175 ]);
176
177 // for backwards compatibility:
178 args.extend([
179 "--sequence".to_owned(),
180 format!("{timeline}={}", cell.value),
181 ]);
182 }
183 re_log_types::TimeType::DurationNs => {
184 args.extend([
185 "--time_duration_nanos".to_owned(),
186 format!("{timeline}={}", cell.value),
187 ]);
188
189 // for backwards compatibility:
190 args.extend(["--time".to_owned(), format!("{timeline}={}", cell.value)]);
191 }
192 re_log_types::TimeType::TimestampNs => {
193 args.extend([
194 "--time_duration_nanos".to_owned(),
195 format!("{timeline}={}", cell.value),
196 ]);
197
198 // for backwards compatibility:
199 args.extend([
200 "--sequence".to_owned(),
201 format!("{timeline}={}", cell.value),
202 ]);
203 }
204 }
205 }
206 }
207
208 args
209 }
210}
211
212pub type DataLoaderName = String;
213
214/// A [`DataLoader`] loads data from a file path and/or a file's contents.
215///
216/// Files can be loaded in 3 different ways:
217/// - via the Rerun CLI (`rerun myfile.jpeg`),
218/// - using drag-and-drop,
219/// - using the open dialog in the Rerun Viewer.
220///
221/// All these file loading methods support loading a single file, many files at once, or even
222/// folders.
223/// ⚠ Drag-and-drop of folders does not yet work on the web version of Rerun Viewer ⚠
224///
225/// We only support loading files from the local filesystem at the moment, and consequently only
226/// accept filepaths as input.
227/// [There are plans to make this generic over any URI](https://github.com/rerun-io/rerun/issues/4525).
228///
229/// Rerun comes with a few [`DataLoader`]s by default:
230/// - [`RrdLoader`] for [Rerun files].
231/// - [`ArchetypeLoader`] for:
232/// - [3D models]
233/// - [Images]
234/// - [Point clouds]
235/// - [Text files]
236/// - [`DirectoryLoader`] for recursively loading folders.
237/// - [`ExternalLoader`], which looks for user-defined data loaders in $PATH.
238///
239/// ## Registering custom loaders
240///
241/// Checkout our [guide](https://www.rerun.io/docs/reference/data-loaders/overview).
242///
243/// ## Execution
244///
245/// **All** known [`DataLoader`]s get called when a user tries to open a file, unconditionally.
246/// This gives [`DataLoader`]s maximum flexibility to decide what files they are interested in, as
247/// opposed to e.g. only being able to look at files' extensions.
248///
249/// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
250/// with a [`DataLoaderError::Incompatible`] error.
251///
252/// Iff all [`DataLoader`]s (including custom and external ones) return with a [`DataLoaderError::Incompatible`]
253/// error, the Viewer will show an error message to the user indicating that the file type is not
254/// supported.
255///
256/// On native, [`DataLoader`]s are executed in parallel.
257///
258/// [Rerun files]: crate::SUPPORTED_RERUN_EXTENSIONS
259/// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS
260/// [Images]: crate::SUPPORTED_IMAGE_EXTENSIONS
261/// [Point clouds]: crate::SUPPORTED_POINT_CLOUD_EXTENSIONS
262/// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS
263//
264// TODO(#4525): `DataLoader`s should support arbitrary URIs
265// TODO(#4527): Web Viewer `?url` parameter should accept anything our `DataLoader`s support
266pub trait DataLoader: Send + Sync {
267 /// Name of the [`DataLoader`].
268 ///
269 /// Should be globally unique.
270 fn name(&self) -> DataLoaderName;
271
272 /// Loads data from a file on the local filesystem and sends it to `tx`.
273 ///
274 /// This is generally called when opening files with the Rerun CLI or via the open menu in the
275 /// Rerun Viewer on native platforms.
276 ///
277 /// The passed-in `store_id` is a shared recording created by the file loading machinery:
278 /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
279 /// similar name in a shared recording, while an rrd file is already its own recording).
280 ///
281 /// `path` isn't necessarily a _file_ path, but can be a directory as well: implementers are
282 /// free to handle that however they decide.
283 ///
284 /// ## Error handling
285 ///
286 /// Most implementers of `load_from_path` are expected to be asynchronous in nature.
287 ///
288 /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
289 /// possible (e.g. didn't even manage to open the file).
290 /// Otherwise, they should log errors that happen in an asynchronous context.
291 ///
292 /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
293 /// with a [`DataLoaderError::Incompatible`] error.
294 #[cfg(not(target_arch = "wasm32"))]
295 fn load_from_path(
296 &self,
297 settings: &DataLoaderSettings,
298 path: std::path::PathBuf,
299 tx: std::sync::mpsc::Sender<LoadedData>,
300 ) -> Result<(), DataLoaderError>;
301
302 /// Loads data from in-memory file contents and sends it to `tx`.
303 ///
304 /// This is generally called when opening files via drag-and-drop or when using the web viewer.
305 ///
306 /// The passed-in `store_id` is a shared recording created by the file loading machinery:
307 /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
308 /// similar name in a shared recording, while an rrd file is already its own recording).
309 ///
310 /// The `path` of the file is given for informational purposes (e.g. to extract the file's
311 /// extension): implementers should _not_ try to read from disk as there is likely isn't a
312 /// filesystem available to begin with.
313 /// `path` is guaranteed to be a file path.
314 ///
315 /// When running on the web (wasm), `filepath` only contains the file name.
316 ///
317 /// ## Error handling
318 ///
319 /// Most implementers of `load_from_file_contents` are expected to be asynchronous in nature.
320 ///
321 /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
322 /// possible (e.g. didn't even manage to open the file).
323 /// Otherwise, they should log errors that happen in an asynchronous context.
324 ///
325 /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
326 /// with a [`DataLoaderError::Incompatible`] error.
327 fn load_from_file_contents(
328 &self,
329 settings: &DataLoaderSettings,
330 filepath: std::path::PathBuf,
331 contents: std::borrow::Cow<'_, [u8]>,
332 tx: std::sync::mpsc::Sender<LoadedData>,
333 ) -> Result<(), DataLoaderError>;
334}
335
336/// Errors that might happen when loading data through a [`DataLoader`].
337#[derive(thiserror::Error, Debug)]
338pub enum DataLoaderError {
339 #[cfg(not(target_arch = "wasm32"))]
340 #[error(transparent)]
341 IO(#[from] std::io::Error),
342
343 #[error(transparent)]
344 Arrow(#[from] arrow::error::ArrowError),
345
346 #[error(transparent)]
347 Chunk(#[from] re_chunk::ChunkError),
348
349 #[error(transparent)]
350 Decode(#[from] re_log_encoding::decoder::DecodeError),
351
352 #[error("No data-loader support for {0:?}")]
353 Incompatible(std::path::PathBuf),
354
355 #[error(transparent)]
356 Mcap(#[from] ::mcap::McapError),
357
358 #[error("{}", re_error::format(.0))]
359 Other(#[from] anyhow::Error),
360}
361
362impl DataLoaderError {
363 #[inline]
364 pub fn is_path_not_found(&self) -> bool {
365 match self {
366 #[cfg(not(target_arch = "wasm32"))]
367 Self::IO(err) => err.kind() == std::io::ErrorKind::NotFound,
368 _ => false,
369 }
370 }
371
372 #[inline]
373 pub fn is_incompatible(&self) -> bool {
374 matches!(self, Self::Incompatible { .. })
375 }
376}
377
378/// What [`DataLoader`]s load.
379///
380/// This makes it trivial for [`DataLoader`]s to build the data in whatever form is
381/// most convenient for them, whether it is raw components, arrow chunks or even
382/// full-on [`LogMsg`]s.
383pub enum LoadedData {
384 Chunk(DataLoaderName, re_log_types::StoreId, Chunk),
385 ArrowMsg(DataLoaderName, re_log_types::StoreId, ArrowMsg),
386 LogMsg(DataLoaderName, LogMsg),
387}
388
389impl LoadedData {
390 /// Returns the name of the [`DataLoader`] that generated this data.
391 #[inline]
392 pub fn data_loader_name(&self) -> &DataLoaderName {
393 match self {
394 Self::Chunk(name, ..) | Self::ArrowMsg(name, ..) | Self::LogMsg(name, ..) => name,
395 }
396 }
397
398 /// Pack the data into a [`LogMsg`].
399 #[inline]
400 pub fn into_log_msg(self) -> ChunkResult<LogMsg> {
401 match self {
402 Self::Chunk(_name, store_id, chunk) => {
403 Ok(LogMsg::ArrowMsg(store_id, chunk.to_arrow_msg()?))
404 }
405
406 Self::ArrowMsg(_name, store_id, msg) => Ok(LogMsg::ArrowMsg(store_id, msg)),
407
408 Self::LogMsg(_name, msg) => Ok(msg),
409 }
410 }
411}
412
413// ----------------------------------------------------------------------------
414
415/// Keeps track of all builtin [`DataLoader`]s.
416///
417/// Lazy initialized the first time a file is opened.
418static BUILTIN_LOADERS: LazyLock<Vec<Arc<dyn DataLoader>>> = LazyLock::new(|| {
419 vec![
420 Arc::new(RrdLoader) as Arc<dyn DataLoader>,
421 Arc::new(ArchetypeLoader),
422 Arc::new(DirectoryLoader),
423 Arc::new(McapLoader::default()),
424 #[cfg(not(target_arch = "wasm32"))]
425 Arc::new(LeRobotDatasetLoader),
426 #[cfg(not(target_arch = "wasm32"))]
427 Arc::new(ExternalLoader),
428 Arc::new(UrdfDataLoader),
429 ]
430});
431
432/// Iterator over all registered [`DataLoader`]s.
433#[inline]
434pub fn iter_loaders() -> impl Iterator<Item = Arc<dyn DataLoader>> {
435 BUILTIN_LOADERS
436 .clone()
437 .into_iter()
438 .chain(CUSTOM_LOADERS.read().clone())
439}
440
441/// Keeps track of all custom [`DataLoader`]s.
442///
443/// Use [`register_custom_data_loader`] to add new loaders.
444static CUSTOM_LOADERS: LazyLock<parking_lot::RwLock<Vec<Arc<dyn DataLoader>>>> =
445 LazyLock::new(parking_lot::RwLock::default);
446
447/// Register a custom [`DataLoader`].
448///
449/// Any time the Rerun Viewer opens a file or directory, this custom loader will be notified.
450/// Refer to [`DataLoader`]'s documentation for more information.
451#[inline]
452pub fn register_custom_data_loader(loader: impl DataLoader + 'static) {
453 CUSTOM_LOADERS.write().push(Arc::new(loader));
454}
455
456// ----------------------------------------------------------------------------
457
458/// Empty string if no extension.
459#[inline]
460pub(crate) fn extension(path: &std::path::Path) -> String {
461 path.extension()
462 .unwrap_or_default()
463 .to_ascii_lowercase()
464 .to_string_lossy()
465 .to_string()
466}
467
468// ----------------------------------------------------------------------------
469
470// …given that all feature flags are turned on for the `image` crate.
471pub const SUPPORTED_IMAGE_EXTENSIONS: &[&str] = &[
472 "avif", "bmp", "dds", "exr", "farbfeld", "ff", "gif", "hdr", "ico", "jpeg", "jpg", "pam",
473 "pbm", "pgm", "png", "ppm", "tga", "tif", "tiff", "webp",
474];
475
476pub const SUPPORTED_VIDEO_EXTENSIONS: &[&str] = &["mp4"];
477
478pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj", "stl"];
479
480// TODO(#4532): `.ply` data loader should support 2D point cloud & meshes
481pub const SUPPORTED_POINT_CLOUD_EXTENSIONS: &[&str] = &["ply"];
482
483pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rbl", "rrd"];
484
485/// 3rd party formats with built-in support.
486pub const SUPPORTED_THIRD_PARTY_FORMATS: &[&str] = &["mcap"];
487
488// TODO(#4555): Add catch-all builtin `DataLoader` for text files
489pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"];
490
491/// All file extension supported by our builtin [`DataLoader`]s.
492pub fn supported_extensions() -> impl Iterator<Item = &'static str> {
493 SUPPORTED_RERUN_EXTENSIONS
494 .iter()
495 .chain(SUPPORTED_THIRD_PARTY_FORMATS)
496 .chain(SUPPORTED_IMAGE_EXTENSIONS)
497 .chain(SUPPORTED_VIDEO_EXTENSIONS)
498 .chain(SUPPORTED_MESH_EXTENSIONS)
499 .chain(SUPPORTED_POINT_CLOUD_EXTENSIONS)
500 .chain(SUPPORTED_TEXT_EXTENSIONS)
501 .copied()
502}
503
504/// Is this a supported file extension by any of our builtin [`DataLoader`]s?
505pub fn is_supported_file_extension(extension: &str) -> bool {
506 debug_assert!(
507 !extension.starts_with('.'),
508 "Expected extension without period, but got {extension:?}"
509 );
510 let extension = extension.to_lowercase();
511 supported_extensions().any(|ext| ext == extension)
512}
513
514#[test]
515fn test_supported_extensions() {
516 assert!(is_supported_file_extension("rrd"));
517 assert!(is_supported_file_extension("mcap"));
518 assert!(is_supported_file_extension("png"));
519}