re_data_loader/
lib.rs

1//! Handles loading of Rerun data from file using data loader plugins.
2
3use std::sync::{Arc, LazyLock};
4
5use re_chunk::{Chunk, ChunkResult};
6use re_log_types::{ArrowMsg, EntityPath, LogMsg, RecordingId, StoreId, TimePoint};
7
8// ----------------------------------------------------------------------------
9
10mod load_file;
11mod loader_archetype;
12mod loader_directory;
13mod loader_rrd;
14mod loader_urdf;
15
16#[cfg(not(target_arch = "wasm32"))]
17pub mod lerobot;
18
19// This loader currently only works when loading the entire dataset directory, and we cannot do that on web yet.
20#[cfg(not(target_arch = "wasm32"))]
21pub mod loader_lerobot;
22
23// This loader currently uses native-only features under the hood, and we cannot do that on web yet.
24pub mod loader_mcap;
25
26#[cfg(not(target_arch = "wasm32"))]
27mod loader_external;
28
29pub use self::load_file::load_from_file_contents;
30pub use self::loader_archetype::ArchetypeLoader;
31pub use self::loader_directory::DirectoryLoader;
32pub use self::loader_mcap::McapLoader;
33pub use self::loader_rrd::RrdLoader;
34pub use self::loader_urdf::{UrdfDataLoader, UrdfTree};
35#[cfg(not(target_arch = "wasm32"))]
36pub use self::{
37    load_file::load_from_path,
38    loader_external::{
39        EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE, EXTERNAL_DATA_LOADER_PREFIX, ExternalLoader,
40        iter_external_loaders,
41    },
42    loader_lerobot::LeRobotDatasetLoader,
43};
44
45pub mod external {
46    pub use urdf_rs;
47}
48
49// ----------------------------------------------------------------------------
50
51/// Recommended settings for the [`DataLoader`].
52///
53/// The loader is free to ignore some or all of these.
54///
55/// External [`DataLoader`]s will be passed the following CLI parameters:
56/// * `--application-id <application_id>`
57/// * `--opened-application-id <opened_application_id>` (if set)
58/// * `--recording-id <store_id>`
59/// * `--opened-recording-id <opened_store_id>` (if set)
60/// * `--entity-path-prefix <entity_path_prefix>` (if set)
61/// * `--static` (if `timepoint` is set to the timeless timepoint)
62/// * `--timeless` \[deprecated\] (if `timepoint` is set to the timeless timepoint)
63/// * `--time_sequence <timeline1>=<seq1> <timeline2>=<seq2> ...` (if `timepoint` contains sequence data)
64/// * `--time_duration_nanos <timeline1>=<duration1> <timeline2>=<duration2> ...` (if `timepoint` contains duration data) in nanos
65/// * `--time_timestamp_nanos <timeline1>=<timestamp1> <timeline2>=<timestamp2> ...` (if `timepoint` contains timestamp data) in nanos since epoch
66#[derive(Debug, Clone)]
67pub struct DataLoaderSettings {
68    /// The recommended [`re_log_types::ApplicationId`] to log the data to, based on the surrounding context.
69    pub application_id: Option<re_log_types::ApplicationId>,
70
71    /// The recommended recording id to log the data to, based on the surrounding context.
72    ///
73    /// Log data to this recording if you want it to appear in a new recording shared by all
74    /// data-loaders for the current loading session.
75    pub recording_id: RecordingId,
76
77    /// The [`re_log_types::StoreId`] that is currently opened in the viewer, if any.
78    pub opened_store_id: Option<StoreId>,
79
80    /// Whether `SetStoreInfo`s should be sent, regardless of the surrounding context.
81    ///
82    /// Only useful when creating a recording just-in-time directly in the viewer (which is what
83    /// happens when importing things into the welcome screen).
84    pub force_store_info: bool,
85
86    /// What should the logged entity paths be prefixed with?
87    pub entity_path_prefix: Option<EntityPath>,
88
89    /// At what time(s) should the data be logged to?
90    pub timepoint: Option<TimePoint>,
91}
92
93impl DataLoaderSettings {
94    #[inline]
95    pub fn recommended(recording_id: impl Into<RecordingId>) -> Self {
96        Self {
97            application_id: Default::default(),
98            recording_id: recording_id.into(),
99            opened_store_id: Default::default(),
100            force_store_info: false,
101            entity_path_prefix: Default::default(),
102            timepoint: Default::default(),
103        }
104    }
105
106    /// Returns the recommended [`re_log_types::StoreId`] to log the data to.
107    pub fn recommended_store_id(&self) -> StoreId {
108        StoreId::recording(
109            self.application_id
110                .clone()
111                .unwrap_or_else(re_log_types::ApplicationId::random),
112            self.recording_id.clone(),
113        )
114    }
115
116    /// Returns the currently opened [`re_log_types::StoreId`] if any. Otherwise, returns the
117    /// recommended store id.
118    pub fn opened_store_id_or_recommended(&self) -> StoreId {
119        self.opened_store_id
120            .clone()
121            .unwrap_or_else(|| self.recommended_store_id())
122    }
123
124    /// Generates CLI flags from these settings, for external data loaders.
125    pub fn to_cli_args(&self) -> Vec<String> {
126        let Self {
127            application_id,
128            recording_id,
129            opened_store_id,
130            force_store_info: _,
131            entity_path_prefix,
132            timepoint,
133        } = self;
134
135        let mut args = Vec::new();
136
137        if let Some(application_id) = application_id {
138            args.extend(["--application-id".to_owned(), format!("{application_id}")]);
139        }
140        args.extend(["--recording-id".to_owned(), format!("{recording_id}")]);
141
142        if let Some(opened_store_id) = opened_store_id {
143            args.extend([
144                "--opened-application-id".to_owned(),
145                format!("{}", opened_store_id.application_id()),
146            ]);
147
148            args.extend([
149                "--opened-recording-id".to_owned(),
150                format!("{}", opened_store_id.recording_id()),
151            ]);
152        }
153
154        if let Some(entity_path_prefix) = entity_path_prefix {
155            args.extend([
156                "--entity-path-prefix".to_owned(),
157                format!("{entity_path_prefix}"),
158            ]);
159        }
160
161        if let Some(timepoint) = timepoint {
162            if timepoint.is_static() {
163                args.push("--timeless".to_owned()); // for backwards compatibility
164                args.push("--static".to_owned());
165            }
166
167            for (timeline, cell) in timepoint.iter() {
168                match cell.typ() {
169                    re_log_types::TimeType::Sequence => {
170                        args.extend([
171                            "--time_sequence".to_owned(),
172                            format!("{timeline}={}", cell.value),
173                        ]);
174
175                        // for backwards compatibility:
176                        args.extend([
177                            "--sequence".to_owned(),
178                            format!("{timeline}={}", cell.value),
179                        ]);
180                    }
181                    re_log_types::TimeType::DurationNs => {
182                        args.extend([
183                            "--time_duration_nanos".to_owned(),
184                            format!("{timeline}={}", cell.value),
185                        ]);
186
187                        // for backwards compatibility:
188                        args.extend(["--time".to_owned(), format!("{timeline}={}", cell.value)]);
189                    }
190                    re_log_types::TimeType::TimestampNs => {
191                        args.extend([
192                            "--time_duration_nanos".to_owned(),
193                            format!("{timeline}={}", cell.value),
194                        ]);
195
196                        // for backwards compatibility:
197                        args.extend([
198                            "--sequence".to_owned(),
199                            format!("{timeline}={}", cell.value),
200                        ]);
201                    }
202                }
203            }
204        }
205
206        args
207    }
208}
209
210pub type DataLoaderName = String;
211
212/// A [`DataLoader`] loads data from a file path and/or a file's contents.
213///
214/// Files can be loaded in 3 different ways:
215/// - via the Rerun CLI (`rerun myfile.jpeg`),
216/// - using drag-and-drop,
217/// - using the open dialog in the Rerun Viewer.
218///
219/// All these file loading methods support loading a single file, many files at once, or even
220/// folders.
221/// ⚠ Drag-and-drop of folders does not yet work on the web version of Rerun Viewer ⚠
222///
223/// We only support loading files from the local filesystem at the moment, and consequently only
224/// accept filepaths as input.
225/// [There are plans to make this generic over any URI](https://github.com/rerun-io/rerun/issues/4525).
226///
227/// Rerun comes with a few [`DataLoader`]s by default:
228/// - [`RrdLoader`] for [Rerun files].
229/// - [`ArchetypeLoader`] for:
230///     - [3D models]
231///     - [Images]
232///     - [Point clouds]
233///     - [Text files]
234/// - [`DirectoryLoader`] for recursively loading folders.
235/// - [`ExternalLoader`], which looks for user-defined data loaders in $PATH.
236///
237/// ## Registering custom loaders
238///
239/// Checkout our [guide](https://www.rerun.io/docs/reference/data-loaders/overview).
240///
241/// ## Execution
242///
243/// **All** known [`DataLoader`]s get called when a user tries to open a file, unconditionally.
244/// This gives [`DataLoader`]s maximum flexibility to decide what files they are interested in, as
245/// opposed to e.g. only being able to look at files' extensions.
246///
247/// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
248/// with a [`DataLoaderError::Incompatible`] error.
249///
250/// Iff all [`DataLoader`]s (including custom and external ones) return with a [`DataLoaderError::Incompatible`]
251/// error, the Viewer will show an error message to the user indicating that the file type is not
252/// supported.
253///
254/// On native, [`DataLoader`]s are executed in parallel.
255///
256/// [Rerun files]: crate::SUPPORTED_RERUN_EXTENSIONS
257/// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS
258/// [Images]: crate::SUPPORTED_IMAGE_EXTENSIONS
259/// [Point clouds]: crate::SUPPORTED_POINT_CLOUD_EXTENSIONS
260/// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS
261//
262// TODO(#4525): `DataLoader`s should support arbitrary URIs
263// TODO(#4527): Web Viewer `?url` parameter should accept anything our `DataLoader`s support
264pub trait DataLoader: Send + Sync {
265    /// Name of the [`DataLoader`].
266    ///
267    /// Should be globally unique.
268    fn name(&self) -> DataLoaderName;
269
270    /// Loads data from a file on the local filesystem and sends it to `tx`.
271    ///
272    /// This is generally called when opening files with the Rerun CLI or via the open menu in the
273    /// Rerun Viewer on native platforms.
274    ///
275    /// The passed-in `store_id` is a shared recording created by the file loading machinery:
276    /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
277    /// similar name in a shared recording, while an rrd file is already its own recording).
278    ///
279    /// `path` isn't necessarily a _file_ path, but can be a directory as well: implementers are
280    /// free to handle that however they decide.
281    ///
282    /// ## Error handling
283    ///
284    /// Most implementers of `load_from_path` are expected to be asynchronous in nature.
285    ///
286    /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
287    /// possible (e.g. didn't even manage to open the file).
288    /// Otherwise, they should log errors that happen in an asynchronous context.
289    ///
290    /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
291    /// with a [`DataLoaderError::Incompatible`] error.
292    #[cfg(not(target_arch = "wasm32"))]
293    fn load_from_path(
294        &self,
295        settings: &DataLoaderSettings,
296        path: std::path::PathBuf,
297        tx: std::sync::mpsc::Sender<LoadedData>,
298    ) -> Result<(), DataLoaderError>;
299
300    /// Loads data from in-memory file contents and sends it to `tx`.
301    ///
302    /// This is generally called when opening files via drag-and-drop or when using the web viewer.
303    ///
304    /// The passed-in `store_id` is a shared recording created by the file loading machinery:
305    /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
306    /// similar name in a shared recording, while an rrd file is already its own recording).
307    ///
308    /// The `path` of the file is given for informational purposes (e.g. to extract the file's
309    /// extension): implementers should _not_ try to read from disk as there is likely isn't a
310    /// filesystem available to begin with.
311    /// `path` is guaranteed to be a file path.
312    ///
313    /// When running on the web (wasm), `filepath` only contains the file name.
314    ///
315    /// ## Error handling
316    ///
317    /// Most implementers of `load_from_file_contents` are expected to be asynchronous in nature.
318    ///
319    /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
320    /// possible (e.g. didn't even manage to open the file).
321    /// Otherwise, they should log errors that happen in an asynchronous context.
322    ///
323    /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
324    /// with a [`DataLoaderError::Incompatible`] error.
325    fn load_from_file_contents(
326        &self,
327        settings: &DataLoaderSettings,
328        filepath: std::path::PathBuf,
329        contents: std::borrow::Cow<'_, [u8]>,
330        tx: std::sync::mpsc::Sender<LoadedData>,
331    ) -> Result<(), DataLoaderError>;
332}
333
334/// Errors that might happen when loading data through a [`DataLoader`].
335#[derive(thiserror::Error, Debug)]
336pub enum DataLoaderError {
337    #[cfg(not(target_arch = "wasm32"))]
338    #[error(transparent)]
339    IO(#[from] std::io::Error),
340
341    #[error(transparent)]
342    Arrow(#[from] arrow::error::ArrowError),
343
344    #[error(transparent)]
345    Chunk(#[from] re_chunk::ChunkError),
346
347    #[error(transparent)]
348    Decode(#[from] re_log_encoding::DecodeError),
349
350    #[error("No data-loader support for {0:?}")]
351    Incompatible(std::path::PathBuf),
352
353    #[error(transparent)]
354    Mcap(#[from] ::mcap::McapError),
355
356    #[error("{}", re_error::format(.0))]
357    Other(#[from] anyhow::Error),
358}
359
360impl DataLoaderError {
361    #[inline]
362    pub fn is_path_not_found(&self) -> bool {
363        match self {
364            #[cfg(not(target_arch = "wasm32"))]
365            Self::IO(err) => err.kind() == std::io::ErrorKind::NotFound,
366            _ => false,
367        }
368    }
369
370    #[inline]
371    pub fn is_incompatible(&self) -> bool {
372        matches!(self, Self::Incompatible { .. })
373    }
374}
375
376/// What [`DataLoader`]s load.
377///
378/// This makes it trivial for [`DataLoader`]s to build the data in whatever form is
379/// most convenient for them, whether it is raw components, arrow chunks or even
380/// full-on [`LogMsg`]s.
381pub enum LoadedData {
382    Chunk(DataLoaderName, re_log_types::StoreId, Chunk),
383    ArrowMsg(DataLoaderName, re_log_types::StoreId, ArrowMsg),
384    LogMsg(DataLoaderName, LogMsg),
385}
386
387impl LoadedData {
388    /// Returns the name of the [`DataLoader`] that generated this data.
389    #[inline]
390    pub fn data_loader_name(&self) -> &DataLoaderName {
391        match self {
392            Self::Chunk(name, ..) | Self::ArrowMsg(name, ..) | Self::LogMsg(name, ..) => name,
393        }
394    }
395
396    /// Pack the data into a [`LogMsg`].
397    #[inline]
398    pub fn into_log_msg(self) -> ChunkResult<LogMsg> {
399        match self {
400            Self::Chunk(_name, store_id, chunk) => {
401                Ok(LogMsg::ArrowMsg(store_id, chunk.to_arrow_msg()?))
402            }
403
404            Self::ArrowMsg(_name, store_id, msg) => Ok(LogMsg::ArrowMsg(store_id, msg)),
405
406            Self::LogMsg(_name, msg) => Ok(msg),
407        }
408    }
409}
410
411// ----------------------------------------------------------------------------
412
413/// Keeps track of all builtin [`DataLoader`]s.
414///
415/// Lazy initialized the first time a file is opened.
416static BUILTIN_LOADERS: LazyLock<Vec<Arc<dyn DataLoader>>> = LazyLock::new(|| {
417    vec![
418        Arc::new(RrdLoader) as Arc<dyn DataLoader>,
419        Arc::new(ArchetypeLoader),
420        Arc::new(DirectoryLoader),
421        Arc::new(McapLoader::default()),
422        #[cfg(not(target_arch = "wasm32"))]
423        Arc::new(LeRobotDatasetLoader),
424        #[cfg(not(target_arch = "wasm32"))]
425        Arc::new(ExternalLoader),
426        Arc::new(UrdfDataLoader),
427    ]
428});
429
430/// Iterator over all registered [`DataLoader`]s.
431#[inline]
432pub fn iter_loaders() -> impl Iterator<Item = Arc<dyn DataLoader>> {
433    BUILTIN_LOADERS
434        .clone()
435        .into_iter()
436        .chain(CUSTOM_LOADERS.read().clone())
437}
438
439/// Keeps track of all custom [`DataLoader`]s.
440///
441/// Use [`register_custom_data_loader`] to add new loaders.
442static CUSTOM_LOADERS: LazyLock<parking_lot::RwLock<Vec<Arc<dyn DataLoader>>>> =
443    LazyLock::new(parking_lot::RwLock::default);
444
445/// Register a custom [`DataLoader`].
446///
447/// Any time the Rerun Viewer opens a file or directory, this custom loader will be notified.
448/// Refer to [`DataLoader`]'s documentation for more information.
449#[inline]
450pub fn register_custom_data_loader(loader: impl DataLoader + 'static) {
451    CUSTOM_LOADERS.write().push(Arc::new(loader));
452}
453
454// ----------------------------------------------------------------------------
455
456/// Empty string if no extension.
457#[inline]
458pub(crate) fn extension(path: &std::path::Path) -> String {
459    path.extension()
460        .unwrap_or_default()
461        .to_ascii_lowercase()
462        .to_string_lossy()
463        .to_string()
464}
465
466// ----------------------------------------------------------------------------
467
468// …given that all feature flags are turned on for the `image` crate.
469pub const SUPPORTED_IMAGE_EXTENSIONS: &[&str] = &[
470    "avif", "bmp", "dds", "exr", "farbfeld", "ff", "gif", "hdr", "ico", "jpeg", "jpg", "pam",
471    "pbm", "pgm", "png", "ppm", "tga", "tif", "tiff", "webp",
472];
473
474pub const SUPPORTED_DEPTH_IMAGE_EXTENSIONS: &[&str] = &["rvl", "png"];
475
476pub const SUPPORTED_VIDEO_EXTENSIONS: &[&str] = &["mp4"];
477
478pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj", "stl", "dae"];
479
480// TODO(#4532): `.ply` data loader should support 2D point cloud & meshes
481pub const SUPPORTED_POINT_CLOUD_EXTENSIONS: &[&str] = &["ply"];
482
483pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rbl", "rrd"];
484
485/// 3rd party formats with built-in support.
486pub const SUPPORTED_THIRD_PARTY_FORMATS: &[&str] = &["mcap", "urdf"];
487
488// TODO(#4555): Add catch-all builtin `DataLoader` for text files
489pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"];
490
491/// All file extension supported by our builtin [`DataLoader`]s.
492pub fn supported_extensions() -> impl Iterator<Item = &'static str> {
493    SUPPORTED_RERUN_EXTENSIONS
494        .iter()
495        .chain(SUPPORTED_THIRD_PARTY_FORMATS)
496        .chain(SUPPORTED_IMAGE_EXTENSIONS)
497        .chain(SUPPORTED_DEPTH_IMAGE_EXTENSIONS)
498        .chain(SUPPORTED_VIDEO_EXTENSIONS)
499        .chain(SUPPORTED_MESH_EXTENSIONS)
500        .chain(SUPPORTED_POINT_CLOUD_EXTENSIONS)
501        .chain(SUPPORTED_TEXT_EXTENSIONS)
502        .copied()
503}
504
505/// Is this a supported file extension by any of our builtin [`DataLoader`]s?
506pub fn is_supported_file_extension(extension: &str) -> bool {
507    debug_assert!(
508        !extension.starts_with('.'),
509        "Expected extension without period, but got {extension:?}"
510    );
511    let extension = extension.to_lowercase();
512    supported_extensions().any(|ext| ext == extension)
513}
514
515#[test]
516fn test_supported_extensions() {
517    assert!(is_supported_file_extension("rrd"));
518    assert!(is_supported_file_extension("mcap"));
519    assert!(is_supported_file_extension("png"));
520    assert!(is_supported_file_extension("urdf"));
521}