re_data_loader/
lib.rs

1//! Handles loading of Rerun data from file using data loader plugins.
2
3use std::sync::Arc;
4
5use once_cell::sync::Lazy;
6
7use re_chunk::{Chunk, ChunkResult};
8use re_log_types::{ArrowMsg, EntityPath, LogMsg, TimePoint};
9
10// ----------------------------------------------------------------------------
11
12mod load_file;
13mod loader_archetype;
14mod loader_directory;
15mod loader_rrd;
16
17#[cfg(not(target_arch = "wasm32"))]
18pub mod lerobot;
19
20// This loader currently only works when loading the entire dataset directory, and we cannot do that on web yet.
21#[cfg(not(target_arch = "wasm32"))]
22pub mod loader_lerobot;
23
24#[cfg(not(target_arch = "wasm32"))]
25mod loader_external;
26
27pub use self::{
28    load_file::load_from_file_contents, loader_archetype::ArchetypeLoader,
29    loader_directory::DirectoryLoader, loader_rrd::RrdLoader,
30};
31
32#[cfg(not(target_arch = "wasm32"))]
33pub use self::{
34    load_file::load_from_path,
35    loader_external::{
36        iter_external_loaders, ExternalLoader, EXTERNAL_DATA_LOADER_INCOMPATIBLE_EXIT_CODE,
37        EXTERNAL_DATA_LOADER_PREFIX,
38    },
39    loader_lerobot::LeRobotDatasetLoader,
40};
41
42// ----------------------------------------------------------------------------
43
44/// Recommended settings for the [`DataLoader`].
45///
46/// The loader is free to ignore some or all of these.
47///
48/// External [`DataLoader`]s will be passed the following CLI parameters:
49/// * `--application-id <application_id>`
50/// * `--opened-application-id <opened_application_id>` (if set)
51/// * `--recording-id <store_id>`
52/// * `--opened-recording-id <opened_store_id>` (if set)
53/// * `--entity-path-prefix <entity_path_prefix>` (if set)
54/// * `--static` (if `timepoint` is set to the timeless timepoint)
55/// * `--timeless` \[deprecated\] (if `timepoint` is set to the timeless timepoint)
56/// * `--time_sequence <timeline1>=<seq1> <timeline2>=<seq2> ...` (if `timepoint` contains sequence data)
57/// * `--time_duration_nanos <timeline1>=<duration1> <timeline2>=<duration2> ...` (if `timepoint` contains duration data) in nanos
58/// * `--time_timestamp_nanos <timeline1>=<timestamp1> <timeline2>=<timestamp2> ...` (if `timepoint` contains timestamp data) in nanos since epoch
59#[derive(Debug, Clone)]
60pub struct DataLoaderSettings {
61    /// The recommended [`re_log_types::ApplicationId`] to log the data to, based on the surrounding context.
62    pub application_id: Option<re_log_types::ApplicationId>,
63
64    /// The [`re_log_types::ApplicationId`] that is currently opened in the viewer, if any.
65    pub opened_application_id: Option<re_log_types::ApplicationId>,
66
67    /// The recommended [`re_log_types::StoreId`] to log the data to, based on the surrounding context.
68    ///
69    /// Log data to this recording if you want it to appear in a new recording shared by all
70    /// data-loaders for the current loading session.
71    pub store_id: re_log_types::StoreId,
72
73    /// The [`re_log_types::StoreId`] that is currently opened in the viewer, if any.
74    pub opened_store_id: Option<re_log_types::StoreId>,
75
76    /// Whether `SetStoreInfo`s should be sent, regardless of the surrounding context.
77    ///
78    /// Only useful when creating a recording just-in-time directly in the viewer (which is what
79    /// happens when importing things into the welcome screen).
80    pub force_store_info: bool,
81
82    /// What should the logged entity paths be prefixed with?
83    pub entity_path_prefix: Option<EntityPath>,
84
85    /// At what time(s) should the data be logged to?
86    pub timepoint: Option<TimePoint>,
87}
88
89impl DataLoaderSettings {
90    #[inline]
91    pub fn recommended(store_id: impl Into<re_log_types::StoreId>) -> Self {
92        Self {
93            application_id: Default::default(),
94            opened_application_id: Default::default(),
95            store_id: store_id.into(),
96            opened_store_id: Default::default(),
97            force_store_info: false,
98            entity_path_prefix: Default::default(),
99            timepoint: Default::default(),
100        }
101    }
102
103    /// Generates CLI flags from these settings, for external data loaders.
104    pub fn to_cli_args(&self) -> Vec<String> {
105        let Self {
106            application_id,
107            opened_application_id,
108            store_id,
109            opened_store_id,
110            force_store_info: _,
111            entity_path_prefix,
112            timepoint,
113        } = self;
114
115        let mut args = Vec::new();
116
117        if let Some(application_id) = application_id {
118            args.extend(["--application-id".to_owned(), format!("{application_id}")]);
119        }
120        args.extend(["--recording-id".to_owned(), format!("{store_id}")]);
121
122        if let Some(opened_application_id) = opened_application_id {
123            args.extend([
124                "--opened-application-id".to_owned(),
125                format!("{opened_application_id}"),
126            ]);
127        }
128        if let Some(opened_store_id) = opened_store_id {
129            args.extend([
130                "--opened-recording-id".to_owned(),
131                format!("{opened_store_id}"),
132            ]);
133        }
134
135        if let Some(entity_path_prefix) = entity_path_prefix {
136            args.extend([
137                "--entity-path-prefix".to_owned(),
138                format!("{entity_path_prefix}"),
139            ]);
140        }
141
142        if let Some(timepoint) = timepoint {
143            if timepoint.is_static() {
144                args.push("--timeless".to_owned()); // for backwards compatibility
145                args.push("--static".to_owned());
146            }
147
148            for (timeline, cell) in timepoint.iter() {
149                match cell.typ() {
150                    re_log_types::TimeType::Sequence => {
151                        args.extend([
152                            "--time_sequence".to_owned(),
153                            format!("{timeline}={}", cell.value),
154                        ]);
155
156                        // for backwards compatibility:
157                        args.extend([
158                            "--sequence".to_owned(),
159                            format!("{timeline}={}", cell.value),
160                        ]);
161                    }
162                    re_log_types::TimeType::DurationNs => {
163                        args.extend([
164                            "--time_duration_nanos".to_owned(),
165                            format!("{timeline}={}", cell.value),
166                        ]);
167
168                        // for backwards compatibility:
169                        args.extend(["--time".to_owned(), format!("{timeline}={}", cell.value)]);
170                    }
171                    re_log_types::TimeType::TimestampNs => {
172                        args.extend([
173                            "--time_duration_nanos".to_owned(),
174                            format!("{timeline}={}", cell.value),
175                        ]);
176
177                        // for backwards compatibility:
178                        args.extend([
179                            "--sequence".to_owned(),
180                            format!("{timeline}={}", cell.value),
181                        ]);
182                    }
183                }
184            }
185        }
186
187        args
188    }
189}
190
191pub type DataLoaderName = String;
192
193/// A [`DataLoader`] loads data from a file path and/or a file's contents.
194///
195/// Files can be loaded in 3 different ways:
196/// - via the Rerun CLI (`rerun myfile.jpeg`),
197/// - using drag-and-drop,
198/// - using the open dialog in the Rerun Viewer.
199///
200/// All these file loading methods support loading a single file, many files at once, or even
201/// folders.
202/// ⚠ Drag-and-drop of folders does not yet work on the web version of Rerun Viewer ⚠
203///
204/// We only support loading files from the local filesystem at the moment, and consequently only
205/// accept filepaths as input.
206/// [There are plans to make this generic over any URI](https://github.com/rerun-io/rerun/issues/4525).
207///
208/// Rerun comes with a few [`DataLoader`]s by default:
209/// - [`RrdLoader`] for [Rerun files].
210/// - [`ArchetypeLoader`] for:
211///     - [3D models]
212///     - [Images]
213///     - [Point clouds]
214///     - [Text files]
215/// - [`DirectoryLoader`] for recursively loading folders.
216/// - [`ExternalLoader`], which looks for user-defined data loaders in $PATH.
217///
218/// ## Registering custom loaders
219///
220/// Checkout our [guide](https://www.rerun.io/docs/reference/data-loaders/overview).
221///
222/// ## Execution
223///
224/// **All** known [`DataLoader`]s get called when a user tries to open a file, unconditionally.
225/// This gives [`DataLoader`]s maximum flexibility to decide what files they are interested in, as
226/// opposed to e.g. only being able to look at files' extensions.
227///
228/// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
229/// with a [`DataLoaderError::Incompatible`] error.
230///
231/// Iff all [`DataLoader`]s (including custom and external ones) return with a [`DataLoaderError::Incompatible`]
232/// error, the Viewer will show an error message to the user indicating that the file type is not
233/// supported.
234///
235/// On native, [`DataLoader`]s are executed in parallel.
236///
237/// [Rerun files]: crate::SUPPORTED_RERUN_EXTENSIONS
238/// [3D models]: crate::SUPPORTED_MESH_EXTENSIONS
239/// [Images]: crate::SUPPORTED_IMAGE_EXTENSIONS
240/// [Point clouds]: crate::SUPPORTED_POINT_CLOUD_EXTENSIONS
241/// [Text files]: crate::SUPPORTED_TEXT_EXTENSIONS
242//
243// TODO(#4525): `DataLoader`s should support arbitrary URIs
244// TODO(#4527): Web Viewer `?url` parameter should accept anything our `DataLoader`s support
245pub trait DataLoader: Send + Sync {
246    /// Name of the [`DataLoader`].
247    ///
248    /// Should be globally unique.
249    fn name(&self) -> DataLoaderName;
250
251    /// Loads data from a file on the local filesystem and sends it to `tx`.
252    ///
253    /// This is generally called when opening files with the Rerun CLI or via the open menu in the
254    /// Rerun Viewer on native platforms.
255    ///
256    /// The passed-in `store_id` is a shared recording created by the file loading machinery:
257    /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
258    /// similar name in a shared recording, while an rrd file is already its own recording).
259    ///
260    /// `path` isn't necessarily a _file_ path, but can be a directory as well: implementers are
261    /// free to handle that however they decide.
262    ///
263    /// ## Error handling
264    ///
265    /// Most implementers of `load_from_path` are expected to be asynchronous in nature.
266    ///
267    /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
268    /// possible (e.g. didn't even manage to open the file).
269    /// Otherwise, they should log errors that happen in an asynchronous context.
270    ///
271    /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
272    /// with a [`DataLoaderError::Incompatible`] error.
273    #[cfg(not(target_arch = "wasm32"))]
274    fn load_from_path(
275        &self,
276        settings: &DataLoaderSettings,
277        path: std::path::PathBuf,
278        tx: std::sync::mpsc::Sender<LoadedData>,
279    ) -> Result<(), DataLoaderError>;
280
281    /// Loads data from in-memory file contents and sends it to `tx`.
282    ///
283    /// This is generally called when opening files via drag-and-drop or when using the web viewer.
284    ///
285    /// The passed-in `store_id` is a shared recording created by the file loading machinery:
286    /// implementers can decide to use it or not (e.g. it might make sense to log all images with a
287    /// similar name in a shared recording, while an rrd file is already its own recording).
288    ///
289    /// The `path` of the file is given for informational purposes (e.g. to extract the file's
290    /// extension): implementers should _not_ try to read from disk as there is likely isn't a
291    /// filesystem available to begin with.
292    /// `path` is guaranteed to be a file path.
293    ///
294    /// When running on the web (wasm), `filepath` only contains the file name.
295    ///
296    /// ## Error handling
297    ///
298    /// Most implementers of `load_from_file_contents` are expected to be asynchronous in nature.
299    ///
300    /// Asynchronous implementers should make sure to fail early (and thus synchronously) when
301    /// possible (e.g. didn't even manage to open the file).
302    /// Otherwise, they should log errors that happen in an asynchronous context.
303    ///
304    /// If a [`DataLoader`] has no interest in the given file, it should fail as soon as possible
305    /// with a [`DataLoaderError::Incompatible`] error.
306    fn load_from_file_contents(
307        &self,
308        settings: &DataLoaderSettings,
309        filepath: std::path::PathBuf,
310        contents: std::borrow::Cow<'_, [u8]>,
311        tx: std::sync::mpsc::Sender<LoadedData>,
312    ) -> Result<(), DataLoaderError>;
313}
314
315/// Errors that might happen when loading data through a [`DataLoader`].
316#[derive(thiserror::Error, Debug)]
317pub enum DataLoaderError {
318    #[cfg(not(target_arch = "wasm32"))]
319    #[error(transparent)]
320    IO(#[from] std::io::Error),
321
322    #[error(transparent)]
323    Arrow(#[from] re_chunk::ChunkError),
324
325    #[error(transparent)]
326    Decode(#[from] re_log_encoding::decoder::DecodeError),
327
328    #[error("No data-loader support for {0:?}")]
329    Incompatible(std::path::PathBuf),
330
331    #[error(transparent)]
332    Other(#[from] anyhow::Error),
333}
334
335impl DataLoaderError {
336    #[inline]
337    pub fn is_path_not_found(&self) -> bool {
338        match self {
339            #[cfg(not(target_arch = "wasm32"))]
340            Self::IO(err) => err.kind() == std::io::ErrorKind::NotFound,
341            _ => false,
342        }
343    }
344
345    #[inline]
346    pub fn is_incompatible(&self) -> bool {
347        matches!(self, Self::Incompatible { .. })
348    }
349}
350
351/// What [`DataLoader`]s load.
352///
353/// This makes it trivial for [`DataLoader`]s to build the data in whatever form is
354/// most convenient for them, whether it is raw components, arrow chunks or even
355/// full-on [`LogMsg`]s.
356pub enum LoadedData {
357    Chunk(DataLoaderName, re_log_types::StoreId, Chunk),
358    ArrowMsg(DataLoaderName, re_log_types::StoreId, ArrowMsg),
359    LogMsg(DataLoaderName, LogMsg),
360}
361
362impl LoadedData {
363    /// Returns the name of the [`DataLoader`] that generated this data.
364    #[inline]
365    pub fn data_loader_name(&self) -> &DataLoaderName {
366        match self {
367            Self::Chunk(name, ..) | Self::ArrowMsg(name, ..) | Self::LogMsg(name, ..) => name,
368        }
369    }
370
371    /// Pack the data into a [`LogMsg`].
372    #[inline]
373    pub fn into_log_msg(self) -> ChunkResult<LogMsg> {
374        match self {
375            Self::Chunk(_name, store_id, chunk) => {
376                Ok(LogMsg::ArrowMsg(store_id, chunk.to_arrow_msg()?))
377            }
378
379            Self::ArrowMsg(_name, store_id, msg) => Ok(LogMsg::ArrowMsg(store_id, msg)),
380
381            Self::LogMsg(_name, msg) => Ok(msg),
382        }
383    }
384}
385
386// ----------------------------------------------------------------------------
387
388/// Keeps track of all builtin [`DataLoader`]s.
389///
390/// Lazy initialized the first time a file is opened.
391static BUILTIN_LOADERS: Lazy<Vec<Arc<dyn DataLoader>>> = Lazy::new(|| {
392    vec![
393        Arc::new(RrdLoader) as Arc<dyn DataLoader>,
394        Arc::new(ArchetypeLoader),
395        Arc::new(DirectoryLoader),
396        #[cfg(not(target_arch = "wasm32"))]
397        Arc::new(LeRobotDatasetLoader),
398        #[cfg(not(target_arch = "wasm32"))]
399        Arc::new(ExternalLoader),
400    ]
401});
402
403/// Iterator over all registered [`DataLoader`]s.
404#[inline]
405pub fn iter_loaders() -> impl Iterator<Item = Arc<dyn DataLoader>> {
406    BUILTIN_LOADERS
407        .clone()
408        .into_iter()
409        .chain(CUSTOM_LOADERS.read().clone())
410}
411
412/// Keeps track of all custom [`DataLoader`]s.
413///
414/// Use [`register_custom_data_loader`] to add new loaders.
415static CUSTOM_LOADERS: Lazy<parking_lot::RwLock<Vec<Arc<dyn DataLoader>>>> =
416    Lazy::new(parking_lot::RwLock::default);
417
418/// Register a custom [`DataLoader`].
419///
420/// Any time the Rerun Viewer opens a file or directory, this custom loader will be notified.
421/// Refer to [`DataLoader`]'s documentation for more information.
422#[inline]
423pub fn register_custom_data_loader(loader: impl DataLoader + 'static) {
424    CUSTOM_LOADERS.write().push(Arc::new(loader));
425}
426
427// ----------------------------------------------------------------------------
428
429/// Empty string if no extension.
430#[inline]
431pub(crate) fn extension(path: &std::path::Path) -> String {
432    path.extension()
433        .unwrap_or_default()
434        .to_ascii_lowercase()
435        .to_string_lossy()
436        .to_string()
437}
438
439// ----------------------------------------------------------------------------
440
441// …given that all feature flags are turned on for the `image` crate.
442pub const SUPPORTED_IMAGE_EXTENSIONS: &[&str] = &[
443    "avif", "bmp", "dds", "exr", "farbfeld", "ff", "gif", "hdr", "ico", "jpeg", "jpg", "pam",
444    "pbm", "pgm", "png", "ppm", "tga", "tif", "tiff", "webp",
445];
446
447pub const SUPPORTED_VIDEO_EXTENSIONS: &[&str] = &["mp4"];
448
449pub const SUPPORTED_MESH_EXTENSIONS: &[&str] = &["glb", "gltf", "obj", "stl"];
450
451// TODO(#4532): `.ply` data loader should support 2D point cloud & meshes
452pub const SUPPORTED_POINT_CLOUD_EXTENSIONS: &[&str] = &["ply"];
453
454pub const SUPPORTED_RERUN_EXTENSIONS: &[&str] = &["rbl", "rrd"];
455
456// TODO(#4555): Add catch-all builtin `DataLoader` for text files
457pub const SUPPORTED_TEXT_EXTENSIONS: &[&str] = &["txt", "md"];
458
459/// All file extension supported by our builtin [`DataLoader`]s.
460pub fn supported_extensions() -> impl Iterator<Item = &'static str> {
461    SUPPORTED_RERUN_EXTENSIONS
462        .iter()
463        .chain(SUPPORTED_IMAGE_EXTENSIONS)
464        .chain(SUPPORTED_VIDEO_EXTENSIONS)
465        .chain(SUPPORTED_MESH_EXTENSIONS)
466        .chain(SUPPORTED_POINT_CLOUD_EXTENSIONS)
467        .chain(SUPPORTED_TEXT_EXTENSIONS)
468        .copied()
469}
470
471/// Is this a supported file extension by any of our builtin [`DataLoader`]s?
472pub fn is_supported_file_extension(extension: &str) -> bool {
473    SUPPORTED_IMAGE_EXTENSIONS.contains(&extension)
474        || SUPPORTED_VIDEO_EXTENSIONS.contains(&extension)
475        || SUPPORTED_MESH_EXTENSIONS.contains(&extension)
476        || SUPPORTED_POINT_CLOUD_EXTENSIONS.contains(&extension)
477        || SUPPORTED_RERUN_EXTENSIONS.contains(&extension)
478        || SUPPORTED_TEXT_EXTENSIONS.contains(&extension)
479}