1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::Context as _;
use itertools::{Itertools as _, izip};
use re_chunk::Chunk;
// ---
#[derive(Debug, Clone, clap::Parser)]
pub struct CompareCommand {
path_to_rrd1: String,
path_to_rrd2: String,
/// If specified, the comparison will focus purely on semantics, ignoring order.
///
/// The Rerun data model is itself unordered, and because many of the internal pipelines are
/// asynchronous by nature, it is very easy to end up with semantically identical, but
/// differently ordered data.
/// In most cases, the distinction is irrelevant, and you'd rather the comparison succeeds.
#[clap(long, default_value_t = false)]
unordered: bool,
/// If specified, dumps both .rrd files as tables.
#[clap(long, default_value_t = false)]
full_dump: bool,
/// If specified, the comparison will ignore chunks without components.
#[clap(long, default_value_t = false)]
ignore_chunks_without_components: bool,
}
impl CompareCommand {
/// Checks whether two .rrd files are _similar_, i.e. not equal on a byte-level but
/// functionally equivalent.
///
/// Returns `Ok(())` if they match, or an error containing a detailed diff otherwise.
pub fn run(&self) -> anyhow::Result<()> {
let Self {
path_to_rrd1,
path_to_rrd2,
unordered,
full_dump,
ignore_chunks_without_components,
} = self;
re_log::debug!("Comparing {path_to_rrd1:?} to {path_to_rrd2:?}…");
let path_to_rrd1 = PathBuf::from(path_to_rrd1);
let path_to_rrd2 = PathBuf::from(path_to_rrd2);
let (app_id1, chunks1) = load_chunks(&path_to_rrd1, *ignore_chunks_without_components)
.with_context(|| format!("path: {path_to_rrd1:?}"))?;
let (app_id2, chunks2) = load_chunks(&path_to_rrd2, *ignore_chunks_without_components)
.with_context(|| format!("path: {path_to_rrd2:?}"))?;
if *full_dump {
println!("{app_id1}");
for chunk in &chunks1 {
println!("{chunk}");
}
println!("{app_id2}");
for chunk in &chunks2 {
println!("{chunk}");
}
}
anyhow::ensure!(
app_id1 == app_id2,
"Application IDs do not match: '{app_id1}' vs. '{app_id2}'"
);
fn format_chunk(chunk: &Chunk) -> String {
re_arrow_util::format_record_batch_opts(
&chunk.to_record_batch().expect("Cannot fail in practice"),
&re_arrow_util::RecordBatchFormatOpts {
width: Some(800),
max_cell_content_width: 100,
trim_field_names: false,
trim_metadata_keys: false,
trim_metadata_values: false,
..Default::default()
},
)
.to_string()
}
if *unordered {
let mut chunks2_remaining = chunks2;
let mut unmatched_chunks1 = Vec::new();
for chunk1 in &chunks1 {
if let Some(pos) = chunks2_remaining
.iter()
.position(|chunk2| re_chunk::Chunk::ensure_similar(chunk1, chunk2).is_ok())
{
chunks2_remaining.swap_remove(pos);
} else {
unmatched_chunks1.push(chunk1.clone());
}
}
if !unmatched_chunks1.is_empty() || !chunks2_remaining.is_empty() {
let mut error_msg = String::from("Unordered comparison failed:\n");
if !unmatched_chunks1.is_empty() {
writeln!(
error_msg,
"\n{} chunk(s) from {path_to_rrd1:?} could not be matched:",
unmatched_chunks1.len()
)
.ok();
for chunk in &unmatched_chunks1 {
writeln!(error_msg, "{}", format_chunk(chunk)).ok();
}
}
if !chunks2_remaining.is_empty() {
writeln!(
error_msg,
"\n{} chunk(s) from {path_to_rrd2:?} could not be matched:",
chunks2_remaining.len()
)
.ok();
for chunk in &chunks2_remaining {
writeln!(error_msg, "{}", format_chunk(chunk)).ok();
}
}
anyhow::bail!(error_msg);
}
} else {
anyhow::ensure!(
chunks1.len() == chunks2.len(),
"Number of Chunks does not match: '{}' vs. '{}'",
re_format::format_uint(chunks1.len()),
re_format::format_uint(chunks2.len()),
);
for (chunk1, chunk2) in izip!(chunks1, chunks2) {
re_chunk::Chunk::ensure_similar(&chunk1, &chunk2).with_context(|| {
format!(
"Chunks diff:\n{}",
similar_asserts::SimpleDiff::from_str(
&format_chunk(&chunk1),
&format_chunk(&chunk2),
&path_to_rrd1.display().to_string(),
&path_to_rrd2.display().to_string(),
),
)
})?;
}
}
re_log::debug!("{path_to_rrd1:?} and {path_to_rrd2:?} are similar enough.");
Ok(())
}
}
/// Given a path to an rrd file, builds up a `ChunkStore` and returns its contents a stream of
/// `Chunk`s.
///
/// Fails if there are more than one data recordings present in the rrd file.
fn load_chunks(
path_to_rrd: &Path,
ignore_chunks_without_components: bool,
) -> anyhow::Result<(re_log_types::ApplicationId, Vec<Arc<re_chunk::Chunk>>)> {
use re_entity_db::EntityDb;
use re_log_types::StoreId;
let rrd_file = std::fs::File::open(path_to_rrd).context("couldn't open rrd file contents")?;
let rrd_file = std::io::BufReader::new(rrd_file);
// TODO(#10730): if the legacy `StoreId` migration is removed from `Decoder`, this would break
// the ability of `rrd compare` pre-0.25 rrds. If we want to keep the ability to migrate here,
// then the pre-#10730 app id caching mechanism must somehow be ported here.
// TODO(ab): For pre-0.25 legacy data with `StoreId` missing their application id, the migration
// in `Decoder` requires `SetStoreInfo` to arrive before the corresponding `ArrowMsg`. Ideally
// this tool would cache orphan `ArrowMsg` until a matching `SetStoreInfo` arrives.
let mut stores: std::collections::HashMap<StoreId, EntityDb> = Default::default();
let decoder = re_log_encoding::DecoderApp::decode_lazy(rrd_file);
for msg in decoder {
let msg = msg.context("decode rrd message")?;
stores
.entry(msg.store_id().clone())
.or_insert_with(|| {
let enable_viewer_indexes = false; // that would just slow us down for no reason
re_entity_db::EntityDb::with_store_config(
msg.store_id().clone(),
enable_viewer_indexes,
// We must make sure not to do any store-side compaction during comparisons, or
// this will result in flaky roundtrips in some instances.
re_chunk_store::ChunkStoreConfig::ALL_DISABLED,
)
})
.add_log_msg(&msg)
.context("decode rrd file contents")?;
}
let mut stores = stores
.values()
.filter(|store| store.store_kind() == re_log_types::StoreKind::Recording)
.collect_vec();
anyhow::ensure!(!stores.is_empty(), "no data recording found in rrd file");
anyhow::ensure!(
stores.len() == 1,
"more than one data recording found in rrd file"
);
#[expect(clippy::unwrap_used)] // safe, ensured above
let store = stores.pop().unwrap();
let engine = store.storage_engine();
Ok((
store.application_id().clone(),
engine
.store()
.iter_physical_chunks()
.filter_map(|c| {
if ignore_chunks_without_components {
(c.num_components() > 0).then_some(c.clone())
} else {
Some(c.clone())
}
})
.collect_vec(),
))
}