1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
//
// Copyright (c) 2021 - 2024 ZettaScale Technology
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License 2.0 which is available at
// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
//
// Contributors:
// ZettaScale Zenoh Team, <zenoh@zettascale.tech>
//
use crate::connectors::{ReceiverRecord, SenderRecord};
use anyhow::{anyhow, bail, Context};
use serde::{Deserialize, Serialize};
use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use uuid::Uuid;
use zenoh_flow_commons::{InstanceId, NodeId, Result, RuntimeId};
use zenoh_flow_descriptors::{
FlattenedDataFlowDescriptor, FlattenedOperatorDescriptor, FlattenedSinkDescriptor,
FlattenedSourceDescriptor, InputDescriptor, LinkDescriptor, OutputDescriptor,
};
use zenoh_keyexpr::OwnedKeyExpr;
const SENDER_SUFFIX: &str = "__zenoh_flow_sender";
const RECEIVER_SUFFIX: &str = "__zenoh_flow_receiver";
/// A `DataFlowRecord` represents a single deployment of a [FlattenedDataFlowDescriptor] on an infrastructure, i.e. on a
/// set of Zenoh-Flow runtimes.
///
/// A `DataFlowRecord` can only be created by processing a [FlattenedDataFlowDescriptor] and providing a default
/// Zenoh-Flow [runtime](RuntimeId) -- that will manage the nodes that have no explicit mapping. See the
/// [try_new](DataFlowRecord::try_new()) method.
///
/// The differences between a [FlattenedDataFlowDescriptor] and a [DataFlowRecord] are:
/// - In a record, all nodes are mapped to a Zenoh-Flow runtime.
/// - A record leverages two additional nodes: [Sender](SenderRecord) and [Receiver](ReceiverRecord). These nodes take
/// care of connecting nodes that are running on different Zenoh-Flow runtimes.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
pub struct DataFlowRecord {
pub(crate) id: InstanceId,
pub(crate) name: Arc<str>,
pub(crate) sources: HashMap<NodeId, FlattenedSourceDescriptor>,
pub(crate) operators: HashMap<NodeId, FlattenedOperatorDescriptor>,
pub(crate) sinks: HashMap<NodeId, FlattenedSinkDescriptor>,
pub(crate) senders: HashMap<NodeId, SenderRecord>,
pub(crate) receivers: HashMap<NodeId, ReceiverRecord>,
pub(crate) links: Vec<LinkDescriptor>,
pub(crate) mapping: HashMap<RuntimeId, HashSet<NodeId>>,
}
impl DataFlowRecord {
/// Attempts to create a [DataFlowRecord] from the provided [FlattenedDataFlowDescriptor], assigning nodes without
/// a mapping to the default [runtime](RuntimeId).
///
/// If the [FlattenedDataFlowDescriptor] did not specify a unique identifier, one will be randomly generated.
///
/// # Errors
///
/// The creation of the [DataFlowRecord] should, in theory, not fail. The only failure point is during the creation
/// of the connectors: the [Sender](SenderRecord) and [Receiver](ReceiverRecord) that are automatically generated
/// when two nodes that need to communicate are located on different runtimes.
///
/// To generate these connectors, a Zenoh key expression is computed. Computing this expression can result in an
/// error if the [NodeId] or [PortId](zenoh_flow_commons::PortId) are not valid chunks (see Zenoh's
/// [keyexpr](https://docs.rs/zenoh-keyexpr/0.10.1-rc/zenoh_keyexpr/key_expr/struct.keyexpr.html) documentation for
/// more details).
///
/// Node that this should not happen if the [FlattenedDataFlowDescriptor] was obtained by parsing and flattening a
/// [DataFlowDescriptor](zenoh_flow_descriptors::DataFlowDescriptor).
pub fn try_new(
data_flow: &FlattenedDataFlowDescriptor,
default_runtime: &RuntimeId,
) -> Result<Self> {
let FlattenedDataFlowDescriptor {
id,
name,
sources,
operators,
sinks,
mut links,
mut mapping,
} = data_flow.clone();
let id = id.unwrap_or_else(|| Uuid::new_v4().into());
// Nodes that are not running on the same runtime need to be connected.
let mut additional_links = Vec::default();
let mut receivers = HashMap::default();
let mut senders = HashMap::default();
let mut default_mapping_if_unassigned = |node_id: &NodeId| {
for (_, nodes) in mapping.iter() {
if nodes.contains(node_id) {
return;
}
}
let runtime_entry = mapping
.entry(default_runtime.clone())
.or_insert_with(HashSet::default);
runtime_entry.insert(node_id.clone());
};
let sources = sources
.into_iter()
.map(|source| {
default_mapping_if_unassigned(&source.id);
(source.id.clone(), source)
})
.collect::<HashMap<_, _>>();
let operators = operators
.into_iter()
.map(|operator| {
default_mapping_if_unassigned(&operator.id);
(operator.id.clone(), operator)
})
.collect::<HashMap<_, _>>();
let sinks = sinks
.into_iter()
.map(|sink| {
default_mapping_if_unassigned(&sink.id);
(sink.id.clone(), sink)
})
.collect::<HashMap<_, _>>();
let try_get_mapping = |node_id: &NodeId| -> Result<&RuntimeId> {
for (runtime, nodes) in mapping.iter() {
if nodes.contains(node_id) {
return Ok(runtime);
}
}
bail!(
r#"
Zenoh-Flow encountered a fatal error: the node < {} > is not mapped to a runtime.
Is its name valid (i.e. does it reference an actual node)?
"#,
node_id
)
};
let mut additional_mappings: HashMap<RuntimeId, HashSet<NodeId>> = HashMap::default();
for link in links.iter_mut() {
let runtime_from = try_get_mapping(&link.from.node)
.context(format!("Failed to process link:\n{}", link))?;
let runtime_to = try_get_mapping(&link.to.node)
.context(format!("Failed to process link:\n{}", link))?;
if runtime_from != runtime_to {
let key_expr_str = format!("{}/{}/{}", id, link.from.node, link.from.output);
let key_expression =
OwnedKeyExpr::autocanonize(key_expr_str.clone()).map_err(|e| {
// NOTE: This error should not happen as we ensure that (i) all node ids and port ids are valid
// key expressions in their canonical form and (ii) they do not contain any of '*', '#', '?' or
// '$' characters (look for `deserialize_id` in zenoh_flow_commons).
anyhow!(
r#"
Zenoh-Flow encountered a fatal internal error: the key expression generated to connect the nodes < {} > and < {} > is
not valid:
{}
Caused by:
{:?}
"#,
link.from.node,
link.to.node,
key_expr_str,
e
)
})?;
let sender_id: NodeId = format!("{}{SENDER_SUFFIX}", link.from.node).into();
let receiver_id: NodeId = format!("{}{RECEIVER_SUFFIX}", link.to.node).into();
let mut input = InputDescriptor {
node: sender_id.clone(),
input: key_expression.to_string().into(),
};
input = std::mem::replace(&mut link.to, input);
let output = OutputDescriptor {
node: receiver_id.clone(),
output: key_expression.to_string().into(),
};
additional_links.push(LinkDescriptor {
from: output,
to: input,
#[cfg(feature = "shared-memory")]
shared_memory: link.shared_memory,
});
senders.insert(
sender_id.clone(),
SenderRecord {
id: sender_id.clone(),
resource: key_expression.clone(),
},
);
additional_mappings
.entry(runtime_from.clone())
.or_insert_with(HashSet::default)
.insert(sender_id);
receivers.insert(
receiver_id.clone(),
ReceiverRecord {
id: receiver_id.clone(),
resource: key_expression,
},
);
additional_mappings
.entry(runtime_to.clone())
.or_insert_with(HashSet::default)
.insert(receiver_id);
}
}
links.append(&mut additional_links);
additional_mappings
.into_iter()
.for_each(|(runtime_id, nodes)| {
mapping
.entry(runtime_id)
.or_insert_with(HashSet::default)
.extend(nodes);
});
Ok(Self {
id,
name,
sources,
operators,
sinks,
senders,
receivers,
links,
mapping,
})
}
/// Returns the unique identifier of this [`DataFlowRecord`].
///
/// # Performance
///
/// The id is internally stored behind an [`Arc`](std::sync::Arc) so there is limited overhead to cloning it.
pub fn instance_id(&self) -> &InstanceId {
&self.id
}
/// Returns the name of the data flow from which this [`DataFlowRecord`] was generated.
pub fn name(&self) -> &Arc<str> {
&self.name
}
/// Returns the mapping of the data flow: which Zenoh-Flow runtime manages which set of nodes.
pub fn mapping(&self) -> &HashMap<RuntimeId, HashSet<NodeId>> {
&self.mapping
}
/// Returns the set of [Senders](SenderRecord) of the data flow.
///
/// A [Sender](SenderRecord) sends data, through a publication on Zenoh, to [Receiver(s)](ReceiverRecord).
pub fn senders(&self) -> &HashMap<NodeId, SenderRecord> {
&self.senders
}
/// Returns the set of [Receivers](ReceiverRecord) of the data flow.
///
/// A [Receiver](ReceiverRecord) receives data, through a subscription on Zenoh, from [Sender(s)](SenderRecord).
pub fn receivers(&self) -> &HashMap<NodeId, ReceiverRecord> {
&self.receivers
}
/// Returns the set of links of the data flow: how the nodes are connected.
///
/// Compared to links found in a [FlattenedDataFlowDescriptor], the links in a [DataFlowRecord] have been updated to
/// take into account the [Sender](SenderRecord) and [Receiver](ReceiverRecord) connecting the Zenoh-Flow runtimes.
pub fn links(&self) -> &[LinkDescriptor] {
&self.links
}
/// Returns the set of [Source(s)](FlattenedSourceDescriptor) of the data flow.
///
/// A `Source` will feed external data in the data flow to be processed by downstream nodes.
pub fn sources(&self) -> &HashMap<NodeId, FlattenedSourceDescriptor> {
&self.sources
}
/// Returns the set of [Operator(s)](FlattenedOperatorDescriptor) of the data flow.
///
/// An `Operator` performs computation on the data it receives, either modifying it or producing new data that it
/// forwards to downstream nodes.
pub fn operators(&self) -> &HashMap<NodeId, FlattenedOperatorDescriptor> {
&self.operators
}
/// Returns the set of [Sink(s)](FlattenedSinkDescriptor) of the data flow.
///
/// A `Sink` exposes the result of the data flow pipeline, such that it can be ingested by external components.
pub fn sinks(&self) -> &HashMap<NodeId, FlattenedSinkDescriptor> {
&self.sinks
}
}
#[cfg(test)]
#[path = "tests.rs"]
mod tests;