re_data_loader/
lib.rs

1//! Handles loading of Rerun data from file using data loader plugins.
2
3use std::sync::Arc;
4
5use once_cell::sync::Lazy;
6
7use re_chunk::{Chunk, ChunkResult};
8use re_log_types::{ArrowMsg, EntityPath, LogMsg, TimePoint};
9
10// ----------------------------------------------------------------------------
11
12mod load_file;
13mod loader_archetype;
14mod loader_directory;
15mod loader_rrd;
16mod loader_urdf;
17
18#[cfg(not(target_arch = "wasm32"))]
19pub mod lerobot;
20
21// This loader currently only works when loading the entire dataset directory, and we cannot do that on web yet.
22#[cfg(not(target_arch = "wasm32"))]
23pub mod loader_lerobot;
24
25#[cfg(not(target_arch = "wasm32"))]
26mod loader_external;
27
28pub use self::{
29    load_file::load_from_file_contents, loader_archetype::ArchetypeLoader,
30    loader_directory::DirectoryLoader, loader_rrd::RrdLoader, loader_urdf::UrdfDataLoader,
31    loader_urdf::UrdfTree,
32};
33
34#[cfg(not(target_arch = "wasm32"))]
35pub use self::{
36    load_file::load_from_path,
37    loader_external::{
38        EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE, EXTERNAL_DATA_LOADER_PREFIX, ExternalLoader,
39        iter_external_loaders,
40    },
41    loader_lerobot::LeRobotDatasetLoader,
42};
43
44pub mod external {
45    pub use urdf_rs;
46}
47
48// ----------------------------------------------------------------------------
49
50/// Recommended settings for the [`DataLoader`].
51///
52/// The loader is free to ignore some or all of these.
53///
54/// External [`DataLoader`]s will be passed the following CLI parameters:
55/// * `--application-id <application_id>`
56/// * `--opened-application-id <opened_application_id>` (if set)
57/// * `--recording-id <store_id>`
58/// * `--opened-recording-id <opened_store_id>` (if set)
59/// * `--entity-path-prefix <entity_path_prefix>` (if set)
60/// * `--static` (if `timepoint` is set to the timeless timepoint)
61/// * `--timeless` \[deprecated\] (if `timepoint` is set to the timeless timepoint)
62/// * `--time_sequence <timeline1>=<seq1> <timeline2>=<seq2> ...` (if `timepoint` contains sequence data)
63/// * `--time_duration_nanos <timeline1>=<duration1> <timeline2>=<duration2> ...` (if `timepoint` contains duration data) in nanos
64/// * `--time_timestamp_nanos <timeline1>=<timestamp1> <timeline2>=<timestamp2> ...` (if `timepoint` contains timestamp data) in nanos since epoch
65#[derive(Debug, Clone)]
66pub struct DataLoaderSettings {
67    /// The recommended [`re_log_types::ApplicationId`] to log the data to, based on the surrounding context.
68    pub application_id: Option<re_log_types::ApplicationId>,
69
70    /// The [`re_log_types::ApplicationId`] that is currently opened in the viewer, if any.
71    pub opened_application_id: Option<re_log_types::ApplicationId>,
72
73    /// The recommended [`re_log_types::StoreId`] to log the data to, based on the surrounding context.
74    ///
75    /// Log data to this recording if you want it to appear in a new recording shared by all
76    /// data-loaders for the current loading session.
77    pub store_id: re_log_types::StoreId,
78
79    /// The [`re_log_types::StoreId`] that is currently opened in the viewer, if any.
80    pub opened_store_id: Option<re_log_types::StoreId>,
81
82    /// Whether `SetStoreInfo`s should be sent, regardless of the surrounding context.
83    ///
84    /// Only useful when creating a recording just-in-time directly in the viewer (which is what
85    /// happens when importing things into the welcome screen).
86    pub force_store_info: bool,
87
88    /// What should the logged entity paths be prefixed with?
89    pub entity_path_prefix: Option<EntityPath>,
90
91    /// At what time(s) should the data be logged to?
92    pub timepoint: Option<TimePoint>,
93}
94
95impl DataLoaderSettings {
96    #[inline]
97    pub fn recommended(store_id: impl Into<re_log_types::StoreId>) -> Self {
98        Self {
99            application_id: Default::default(),
100            opened_application_id: Default::default(),
101            store_id: store_id.into(),
102            opened_store_id: Default::default(),
103            force_store_info: false,
104            entity_path_prefix: Default::default(),
105            timepoint: Default::default(),
106        }
107    }
108
109    /// Generates CLI flags from these settings, for external data loaders.
110    pub fn to_cli_args(&self) -> Vec<String> {
111        let Self {
112            application_id,
113            opened_application_id,
114            store_id,
115            opened_store_id,
116            force_store_info: _,
117            entity_path_prefix,
118            timepoint,
119        } = self;
120
121        let mut args = Vec::new();
122
123        if let Some(application_id) = application_id {
124            args.extend(["--application-id".to_owned(), format!("{application_id}")]);
125        }
126        args.extend(["--recording-id".to_owned(), format!("{store_id}")]);
127
128        if let Some(opened_application_id) = opened_application_id {
129            args.extend([
130                "--opened-application-id".to_owned(),
131                format!("{opened_application_id}"),
132            ]);
133        }
134        if let Some(opened_store_id) = opened_store_id {
135            args.extend([
136                "--opened-recording-id".to_owned(),
137                format!("{opened_store_id}"),
138            ]);
139        }
140
141        if let Some(entity_path_prefix) = entity_path_prefix {
142            args.extend([
143                "--entity-path-prefix".to_owned(),
144                format!("{entity_path_prefix}"),
145            ]);
146        }
147
148        if let Some(timepoint) = timepoint {
149            if timepoint.is_static() {
150                args.push("--timeless".to_owned()); // for backwards compatibility
151                args.push("--static".to_owned());
152            }
153
154            for (timeline, cell) in timepoint.iter() {
155                match cell.typ() {
156                    re_log_types::TimeType::Sequence => {
157                        args.extend([
158                            "--time_sequence".to_owned(),
159                            format!("{timeline}={}", cell.value),
160                        ]);
161
162                        // for backwards compatibility:
163                        args.extend([
164                            "--sequence".to_owned(),
165                            format!("{timeline}={}", cell.value),
166                        ]);
167                    }
168                    re_log_types::TimeType::DurationNs => {
169                        args.extend([
170                            "--time_duration_nanos".to_owned(),
171                            format!("{timeline}={}", cell.value),
172                        ]);
173
174                        // for backwards compatibility:
175                        args.extend(["--time".to_owned(), format!("{timeline}={}", cell.value)]);
176                    }
177                    re_log_types::TimeType::TimestampNs => {
178                        args.extend([
179                            "--time_duration_nanos".to_owned(),
180                            format!("{timeline}={}", cell.value),
181                        ]);
182
183                        // for backwards compatibility:
184                        args.extend([
185                            "--sequence".to_owned(),
186                            format!("{timeline}={}", cell.value),
187                        ]);
188                    }
189                }
190            }
191        }
192
193        args
194    }
195}
196
197pub type DataLoaderName = String;
198
199/// A [`DataLoader`] loads data from a file path and/or a file's contents.
200///
201/// Files can be loaded in 3 different ways:
202/// - via the Rerun CLI (`rerun myfile.jpeg`),
203/// - using drag-and-drop,
204/// - using the open dialog in the Rerun Viewer.
205///
206/// All these file loading methods support loading a single file, many files at once, or even
207/// folders.
208/// ⚠ Drag-and-drop of folders does not yet work on the web version of Rerun Viewer ⚠
209///
210/// We only support loading files from the local filesystem at the moment, and consequently only
211/// accept filepaths as input.
212/// [There are plans to make this generic over any URI](https://github.com/rerun-io/rerun/issues/4525).
213///
214/// Rerun comes with a few [`DataLoader`]s by default:
215/// - [`RrdLoader`] for [Rerun files].
216/// - [`ArchetypeLoader`] for:
217///     - [3D models]
218///     - [Images]
219///     - [Point clouds]
220///     - [Text files]
221/// - [`DirectoryLoader`] for recursively loading folders.
222/// - [`ExternalLoader`], which looks for user-defined data loaders in $PATH.
223///
224/// ## Registering custom loaders
225///
226/// Checkout our [guide](https://www.rerun.io/docs/reference/data-loaders/overview).
227///
228/// ## Execution
229///
230/// **All** known [`DataLoader`]s get called when a user tries to open a file, unconditionally.
231/// This gives [`DataLoader`]s maximum flexibility to decide what files they are interested in, as
232/// opposed to e.g. only being able to look at files' extensions.
233///
234/// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
235/// with a [`DataLoaderError::Incompatible`] error.
236///
237/// Iff all [`DataLoader`]s (including custom and external ones) return with a [`DataLoaderError::Incompatible`]
238/// error, the Viewer will show an error message to the user indicating that the file type is not
239/// supported.
240///
241/// On native, [`DataLoader`]s are executed in parallel.
242///
243/// [Rerun files]: crate::SUPPORTED_RERUN_EXTENSIONS
244/// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS
245/// [Images]: crate::SUPPORTED_IMAGE_EXTENSIONS
246/// [Point clouds]: crate::SUPPORTED_POINT_CLOUD_EXTENSIONS
247/// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS
248//
249// TODO(#4525): `DataLoader`s should support arbitrary URIs
250// TODO(#4527): Web Viewer `?url` parameter should accept anything our `DataLoader`s support
251pub trait DataLoader: Send + Sync {
252    /// Name of the [`DataLoader`].
253    ///
254    /// Should be globally unique.
255    fn name(&self) -> DataLoaderName;
256
257    /// Loads data from a file on the local filesystem and sends it to `tx`.
258    ///
259    /// This is generally called when opening files with the Rerun CLI or via the open menu in the
260    /// Rerun Viewer on native platforms.
261    ///
262    /// The passed-in `store_id` is a shared recording created by the file loading machinery:
263    /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
264    /// similar name in a shared recording, while an rrd file is already its own recording).
265    ///
266    /// `path` isn't necessarily a _file_ path, but can be a directory as well: implementers are
267    /// free to handle that however they decide.
268    ///
269    /// ## Error handling
270    ///
271    /// Most implementers of `load_from_path` are expected to be asynchronous in nature.
272    ///
273    /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
274    /// possible (e.g. didn't even manage to open the file).
275    /// Otherwise, they should log errors that happen in an asynchronous context.
276    ///
277    /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
278    /// with a [`DataLoaderError::Incompatible`] error.
279    #[cfg(not(target_arch = "wasm32"))]
280    fn load_from_path(
281        &self,
282        settings: &DataLoaderSettings,
283        path: std::path::PathBuf,
284        tx: std::sync::mpsc::Sender<LoadedData>,
285    ) -> Result<(), DataLoaderError>;
286
287    /// Loads data from in-memory file contents and sends it to `tx`.
288    ///
289    /// This is generally called when opening files via drag-and-drop or when using the web viewer.
290    ///
291    /// The passed-in `store_id` is a shared recording created by the file loading machinery:
292    /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
293    /// similar name in a shared recording, while an rrd file is already its own recording).
294    ///
295    /// The `path` of the file is given for informational purposes (e.g. to extract the file's
296    /// extension): implementers should _not_ try to read from disk as there is likely isn't a
297    /// filesystem available to begin with.
298    /// `path` is guaranteed to be a file path.
299    ///
300    /// When running on the web (wasm), `filepath` only contains the file name.
301    ///
302    /// ## Error handling
303    ///
304    /// Most implementers of `load_from_file_contents` are expected to be asynchronous in nature.
305    ///
306    /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
307    /// possible (e.g. didn't even manage to open the file).
308    /// Otherwise, they should log errors that happen in an asynchronous context.
309    ///
310    /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
311    /// with a [`DataLoaderError::Incompatible`] error.
312    fn load_from_file_contents(
313        &self,
314        settings: &DataLoaderSettings,
315        filepath: std::path::PathBuf,
316        contents: std::borrow::Cow<'_, [u8]>,
317        tx: std::sync::mpsc::Sender<LoadedData>,
318    ) -> Result<(), DataLoaderError>;
319}
320
321/// Errors that might happen when loading data through a [`DataLoader`].
322#[derive(thiserror::Error, Debug)]
323pub enum DataLoaderError {
324    #[cfg(not(target_arch = "wasm32"))]
325    #[error(transparent)]
326    IO(#[from] std::io::Error),
327
328    #[error(transparent)]
329    Arrow(#[from] re_chunk::ChunkError),
330
331    #[error(transparent)]
332    Decode(#[from] re_log_encoding::decoder::DecodeError),
333
334    #[error("No data-loader support for {0:?}")]
335    Incompatible(std::path::PathBuf),
336
337    #[error("{}", re_error::format(.0))]
338    Other(#[from] anyhow::Error),
339}
340
341impl DataLoaderError {
342    #[inline]
343    pub fn is_path_not_found(&self) -> bool {
344        match self {
345            #[cfg(not(target_arch = "wasm32"))]
346            Self::IO(err) => err.kind() == std::io::ErrorKind::NotFound,
347            _ => false,
348        }
349    }
350
351    #[inline]
352    pub fn is_incompatible(&self) -> bool {
353        matches!(self, Self::Incompatible { .. })
354    }
355}
356
357/// What [`DataLoader`]s load.
358///
359/// This makes it trivial for [`DataLoader`]s to build the data in whatever form is
360/// most convenient for them, whether it is raw components, arrow chunks or even
361/// full-on [`LogMsg`]s.
362pub enum LoadedData {
363    Chunk(DataLoaderName, re_log_types::StoreId, Chunk),
364    ArrowMsg(DataLoaderName, re_log_types::StoreId, ArrowMsg),
365    LogMsg(DataLoaderName, LogMsg),
366}
367
368impl LoadedData {
369    /// Returns the name of the [`DataLoader`] that generated this data.
370    #[inline]
371    pub fn data_loader_name(&self) -> &DataLoaderName {
372        match self {
373            Self::Chunk(name, ..) | Self::ArrowMsg(name, ..) | Self::LogMsg(name, ..) => name,
374        }
375    }
376
377    /// Pack the data into a [`LogMsg`].
378    #[inline]
379    pub fn into_log_msg(self) -> ChunkResult<LogMsg> {
380        match self {
381            Self::Chunk(_name, store_id, chunk) => {
382                Ok(LogMsg::ArrowMsg(store_id, chunk.to_arrow_msg()?))
383            }
384
385            Self::ArrowMsg(_name, store_id, msg) => Ok(LogMsg::ArrowMsg(store_id, msg)),
386
387            Self::LogMsg(_name, msg) => Ok(msg),
388        }
389    }
390}
391
392// ----------------------------------------------------------------------------
393
394/// Keeps track of all builtin [`DataLoader`]s.
395///
396/// Lazy initialized the first time a file is opened.
397static BUILTIN_LOADERS: Lazy<Vec<Arc<dyn DataLoader>>> = Lazy::new(|| {
398    vec![
399        Arc::new(RrdLoader) as Arc<dyn DataLoader>,
400        Arc::new(ArchetypeLoader),
401        Arc::new(DirectoryLoader),
402        #[cfg(not(target_arch = "wasm32"))]
403        Arc::new(LeRobotDatasetLoader),
404        #[cfg(not(target_arch = "wasm32"))]
405        Arc::new(ExternalLoader),
406        Arc::new(UrdfDataLoader),
407    ]
408});
409
410/// Iterator over all registered [`DataLoader`]s.
411#[inline]
412pub fn iter_loaders() -> impl Iterator<Item = Arc<dyn DataLoader>> {
413    BUILTIN_LOADERS
414        .clone()
415        .into_iter()
416        .chain(CUSTOM_LOADERS.read().clone())
417}
418
419/// Keeps track of all custom [`DataLoader`]s.
420///
421/// Use [`register_custom_data_loader`] to add new loaders.
422static CUSTOM_LOADERS: Lazy<parking_lot::RwLock<Vec<Arc<dyn DataLoader>>>> =
423    Lazy::new(parking_lot::RwLock::default);
424
425/// Register a custom [`DataLoader`].
426///
427/// Any time the Rerun Viewer opens a file or directory, this custom loader will be notified.
428/// Refer to [`DataLoader`]'s documentation for more information.
429#[inline]
430pub fn register_custom_data_loader(loader: impl DataLoader + 'static) {
431    CUSTOM_LOADERS.write().push(Arc::new(loader));
432}
433
434// ----------------------------------------------------------------------------
435
436/// Empty string if no extension.
437#[inline]
438pub(crate) fn extension(path: &std::path::Path) -> String {
439    path.extension()
440        .unwrap_or_default()
441        .to_ascii_lowercase()
442        .to_string_lossy()
443        .to_string()
444}
445
446// ----------------------------------------------------------------------------
447
448// …given that all feature flags are turned on for the `image` crate.
449pub const SUPPORTED_IMAGE_EXTENSIONS: &[&str] = &[
450    "avif", "bmp", "dds", "exr", "farbfeld", "ff", "gif", "hdr", "ico", "jpeg", "jpg", "pam",
451    "pbm", "pgm", "png", "ppm", "tga", "tif", "tiff", "webp",
452];
453
454pub const SUPPORTED_VIDEO_EXTENSIONS: &[&str] = &["mp4"];
455
456pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj", "stl"];
457
458// TODO(#4532): `.ply` data loader should support 2D point cloud & meshes
459pub const SUPPORTED_POINT_CLOUD_EXTENSIONS: &[&str] = &["ply"];
460
461pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rbl", "rrd"];
462
463// TODO(#4555): Add catch-all builtin `DataLoader` for text files
464pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"];
465
466/// All file extension supported by our builtin [`DataLoader`]s.
467pub fn supported_extensions() -> impl Iterator<Item = &'static str> {
468    SUPPORTED_RERUN_EXTENSIONS
469        .iter()
470        .chain(SUPPORTED_IMAGE_EXTENSIONS)
471        .chain(SUPPORTED_VIDEO_EXTENSIONS)
472        .chain(SUPPORTED_MESH_EXTENSIONS)
473        .chain(SUPPORTED_POINT_CLOUD_EXTENSIONS)
474        .chain(SUPPORTED_TEXT_EXTENSIONS)
475        .copied()
476}
477
478/// Is this a supported file extension by any of our builtin [`DataLoader`]s?
479pub fn is_supported_file_extension(extension: &str) -> bool {
480    SUPPORTED_IMAGE_EXTENSIONS.contains(&extension)
481        || SUPPORTED_VIDEO_EXTENSIONS.contains(&extension)
482        || SUPPORTED_MESH_EXTENSIONS.contains(&extension)
483        || SUPPORTED_POINT_CLOUD_EXTENSIONS.contains(&extension)
484        || SUPPORTED_RERUN_EXTENSIONS.contains(&extension)
485        || SUPPORTED_TEXT_EXTENSIONS.contains(&extension)
486}