grabapl 0.0.4

A library for graph-based programming languages, with pluggable type systems and a focus on visible intermediate states.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
use crate::graph::GraphTrait;
use crate::operation::marker::MarkerSet;
use crate::operation::{OperationError, OperationResult};
use crate::semantics::AbstractGraph;
use crate::util::bimap::BiMap;
use crate::util::{InternString, log};
use crate::{NodeKey, Semantics, SubstMarker, interned_string_newtype};
use derive_more::From;
use error_stack::bail;
use petgraph::visit::UndirectedAdaptor;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use thiserror::Error;
// TODO: rename/move these structs and file. 'pattern.rs' is an outdated term.
// renamed.

#[derive(Debug, Error)]
pub enum OperationParameterError {
    #[error(
        "Context node {0:?} is not connected to any explicit input nodes in the parameter graph"
    )]
    ContextNodeNotConnected(SubstMarker),
}

#[cfg_attr(
    feature = "serde",
    derive(Serialize, Deserialize),
    serde(bound = "S: crate::serde::SemanticsSerde")
)]
pub struct OperationParameter<S: Semantics> {
    /// The ordered input nodes that must be explicitly selected.
    pub explicit_input_nodes: Vec<SubstMarker>,
    /// The initial abstract state that the operation expects.
    pub parameter_graph: AbstractGraph<S>,
    // TODO: Actually, because an operation may accept the same node multiple times, we may want to to have the inverse actually be a multimap? so NodeKey -> Vec<SubstMarker>
    //  (^ is if we support node aliasing.)
    /// Associates node keys of the parameter graph with the substitution markers.
    pub node_keys_to_subst: BiMap<NodeKey, SubstMarker>,
}

impl<S: Semantics> PartialEq for OperationParameter<S> {
    fn eq(&self, other: &Self) -> bool {
        // TODO: we could lift the requirement of "with_same_keys" if we remapped based on SubstMarker.
        self.explicit_input_nodes == other.explicit_input_nodes
            && self
                .parameter_graph
                .semantically_matches_with_same_keys(&other.parameter_graph)
            && self.node_keys_to_subst == other.node_keys_to_subst
    }
}

impl<S: Semantics> Clone for OperationParameter<S> {
    fn clone(&self) -> Self {
        OperationParameter {
            explicit_input_nodes: self.explicit_input_nodes.clone(),
            parameter_graph: self.parameter_graph.clone(),
            node_keys_to_subst: self.node_keys_to_subst.clone(),
        }
    }
}

impl<S: Semantics> OperationParameter<S> {
    pub fn new_empty() -> Self {
        OperationParameter {
            explicit_input_nodes: Vec::new(),
            parameter_graph: AbstractGraph::<S>::new(),
            node_keys_to_subst: BiMap::new(),
        }
    }

    pub fn check_validity(&self) -> Result<(), OperationParameterError> {
        // we want weak connected components, hence we use UndirectedAdaptor
        let undi = UndirectedAdaptor(&self.parameter_graph.graph);
        let components = petgraph::algo::tarjan_scc(&undi);

        for component in components {
            let mut contains_explicit_input = false;
            for key in &component {
                let subst_marker = self
                    .node_keys_to_subst
                    .get_left(key)
                    .expect("internal error: should find subst marker for node key");
                if self.explicit_input_nodes.contains(subst_marker) {
                    contains_explicit_input = true;
                    break;
                }
            }
            if !contains_explicit_input {
                let example_context_node = component[0];
                let subst_marker = self
                    .node_keys_to_subst
                    .get_left(&example_context_node)
                    .expect("internal error: should find subst marker for node key");
                return Err(OperationParameterError::ContextNodeNotConnected(
                    *subst_marker,
                ));
            }
        }

        Ok(())
    }
}

/// The result of trying to bind an abstract graph to a parameter graph.
// Note: this is a mapping from OperationParameter substmarkers to the dynamic/argument graph node keys.
#[derive(Debug)]
pub struct ParameterSubstitution {
    pub mapping: HashMap<SubstMarker, NodeKey>,
}

impl ParameterSubstitution {
    pub fn new(mapping: HashMap<SubstMarker, NodeKey>) -> Self {
        ParameterSubstitution { mapping }
    }

    pub fn infer_explicit_for_param(
        selected_nodes: &[NodeKey],
        param: &OperationParameter<impl Semantics>,
    ) -> OperationResult<Self> {
        if param.explicit_input_nodes.len() != selected_nodes.len() {
            bail!(OperationError::InvalidOperationArgumentCount {
                expected: param.explicit_input_nodes.len(),
                actual: selected_nodes.len(),
            });
        }

        let mapping = param
            .explicit_input_nodes
            .iter()
            .zip(selected_nodes.iter())
            .map(|(subst_marker, node_key)| (subst_marker.clone(), *node_key))
            .collect();
        Ok(ParameterSubstitution { mapping })
    }
}

#[derive(Debug, Clone, Copy, From, Hash, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum NewNodeMarker {
    Named(InternString),
    // TODO: hide this
    #[from(ignore)]
    Implicit(u32),
}
interned_string_newtype!(NewNodeMarker, NewNodeMarker::Named);

/// Used inside GraphWithSubstitution to refer to nodes.
#[derive(Debug, Clone, Copy, From, Hash, Eq, PartialEq)]
pub enum NodeMarker {
    Subst(SubstMarker),
    New(NewNodeMarker),
}

pub struct GraphWithSubstitution<'a, G: GraphTrait> {
    pub graph: &'a mut G,
    /// Maps operation-defined SubstMarkers to the dynamic graph node keys.
    pub subst: &'a ParameterSubstitution,
    /// Maps newly created nodes to their SubstMarker.
    new_nodes_map: HashMap<NewNodeMarker, NodeKey>,
    max_new_node_marker: u32,
    /// Keeps track of changes done to the graph.
    new_nodes: Vec<NodeKey>,
    new_edges: Vec<(NodeKey, NodeKey)>,
    removed_nodes: Vec<NodeKey>,
    removed_edges: Vec<(NodeKey, NodeKey)>,
    changed_node_av: HashMap<NodeKey, G::NodeAttr>,
    changed_edge_av: HashMap<(NodeKey, NodeKey), G::EdgeAttr>,
}

impl<'a, G: GraphTrait<NodeAttr: Clone, EdgeAttr: Clone>> GraphWithSubstitution<'a, G> {
    pub fn new(graph: &'a mut G, subst: &'a ParameterSubstitution) -> Self {
        GraphWithSubstitution {
            graph,
            subst,
            new_nodes_map: HashMap::new(),
            max_new_node_marker: 0,
            new_nodes: Vec::new(),
            new_edges: Vec::new(),
            removed_nodes: Vec::new(),
            removed_edges: Vec::new(),
            changed_node_av: HashMap::new(),
            changed_edge_av: HashMap::new(),
        }
    }

    pub fn get_node_key(&self, marker: &NodeMarker) -> Option<NodeKey> {
        let found_key = match marker {
            NodeMarker::Subst(sm) => {
                // If the marker is a SubstMarker, we look it up in the substitution mapping.
                self.subst.mapping.get(&sm).copied()
            }
            NodeMarker::New(nnm) => self.new_nodes_map.get(&nnm).copied(),
        };
        if let Some(key) = found_key {
            if self.removed_nodes.contains(&key) {
                // don't return a key that was removed
                return None;
            }
        }
        found_key
    }

    pub fn new_node_marker(&mut self) -> NewNodeMarker {
        let marker = NewNodeMarker::Implicit(self.max_new_node_marker);
        self.max_new_node_marker += 1;
        marker
    }

    pub fn add_node(&mut self, marker: impl Into<NewNodeMarker>, value: G::NodeAttr) {
        let marker = marker.into();
        // TODO: make this error
        if self.get_node_key(&NodeMarker::New(marker)).is_some() {
            // TODO: should we disallow re-adding a node that was deleted? if so,
            //  the above should be changed since it skips previously removed nodes
            panic!(
                "Marker {:?} already exists in the substitution mapping",
                marker
            );
        }
        let node_key = self.graph.add_node(value);
        self.new_nodes.push(node_key);
        self.new_nodes_map.insert(marker, node_key);
    }
    pub fn delete_node(&mut self, marker: impl Into<NodeMarker>) -> Option<G::NodeAttr> {
        let marker = marker.into();
        let Some(node_key) = self.get_node_key(&marker) else {
            return None; // Node not found in the substitution or new nodes.
        };
        let removed_value = self.graph.delete_node(node_key);
        if removed_value.is_some() {
            self.removed_nodes.push(node_key);
        }
        removed_value
    }

    pub fn add_edge(
        &mut self,
        src_marker: impl Into<NodeMarker>,
        dst_marker: impl Into<NodeMarker>,
        value: G::EdgeAttr,
    ) -> Option<G::EdgeAttr> {
        let src_marker = src_marker.into();
        let dst_marker = dst_marker.into();
        let src_key = self.get_node_key(&src_marker)?;
        let dst_key = self.get_node_key(&dst_marker)?;
        self.new_edges.push((src_key, dst_key));
        self.graph.add_edge(src_key, dst_key, value)
    }

    pub fn delete_edge(
        &mut self,
        src_marker: impl Into<NodeMarker>,
        dst_marker: impl Into<NodeMarker>,
    ) -> Option<G::EdgeAttr> {
        let src_marker = src_marker.into();
        let dst_marker = dst_marker.into();
        let src_key = self.get_node_key(&src_marker)?;
        let dst_key = self.get_node_key(&dst_marker)?;
        let removed_value = self.graph.delete_edge(src_key, dst_key);
        if removed_value.is_some() {
            self.removed_edges.push((src_key, dst_key));
        }
        removed_value
    }

    pub fn get_node_value(&self, marker: impl Into<NodeMarker>) -> Option<&G::NodeAttr> {
        let marker = marker.into();
        self.get_node_key(&marker)
            .and_then(|node_key| self.graph.get_node_attr(node_key))
    }

    // TODO: in general these functions are a bit awkward (set_edge_value too), since they return
    //  an optional. If the optional is None, it means the node/edge does not exist, and no value was set.
    //  Can we communicate this better?
    pub fn set_node_value(
        &mut self,
        marker: impl Into<NodeMarker>,
        value: G::NodeAttr,
    ) -> Option<G::NodeAttr> {
        let marker = marker.into();
        let node_key = self.get_node_key(&marker)?;
        self.changed_node_av.insert(node_key, value.clone());
        let old_value = self.graph.set_node_attr(node_key, value.clone());
        if old_value.is_some() {
            // we only changed it if it exists, by semantics of set_node_attr
            self.changed_node_av.insert(node_key, value);
        }
        old_value
    }

    /// Use this method to apply another operation's maybe-changes to the graph.
    /// This will update the current view of the node's AV to be sound, i.e., the join of the current AV and the maybe-written AV,
    /// but will remember that it was only the maybe-written AV that was _actually_ maybe written.
    pub fn maybe_set_node_value(
        &mut self,
        marker: impl Into<NodeMarker>,
        maybe_written_av: G::NodeAttr,
        join: impl Fn(&G::NodeAttr, &G::NodeAttr) -> Option<G::NodeAttr>,
    ) -> Option<G::NodeAttr> {
        let marker = marker.into();
        let node_key = self.get_node_key(&marker)?;
        if let Some(old_av) = self.graph.get_node_attr(node_key) {
            // only remember that we maybe wrote "maybe_written_av".
            self.changed_node_av
                .insert(node_key, maybe_written_av.clone());
            // Merge the current AV with the new value.
            let merged_av = join(old_av, &maybe_written_av)
                // TODO: expect is bad here and for edges.
                //  We need to hide the node (just like when merging IntermediateStates) if the join does not exist.
                //  This unwrap means semantics like the following will panic:
                //  `int` type, `str` type, no join. Function takes `str`, maybe-writes `int` to it.
                //  at the call-site of the outer operation, we must now join (i.e., this function) the `int` and `str` types.
                //  This is something that makes sense, and should be supported.
                .expect("must be able to join. TODO: think about if this requirement makes sense");
            // merged_av is the new value we want to set.
            self.graph.set_node_attr(node_key, merged_av)
        } else {
            None
        }
    }

    pub fn get_edge_value(
        &self,
        src_marker: impl Into<NodeMarker>,
        dst_marker: impl Into<NodeMarker>,
    ) -> Option<&G::EdgeAttr> {
        let src_marker = src_marker.into();
        let dst_marker = dst_marker.into();
        let src_key = self.get_node_key(&src_marker)?;
        let dst_key = self.get_node_key(&dst_marker)?;
        self.graph.get_edge_attr((src_key, dst_key))
    }

    pub fn set_edge_value(
        &mut self,
        src_marker: impl Into<NodeMarker>,
        dst_marker: impl Into<NodeMarker>,
        value: G::EdgeAttr,
    ) -> Option<G::EdgeAttr> {
        let src_marker = src_marker.into();
        let dst_marker = dst_marker.into();
        let src_key = self.get_node_key(&src_marker)?;
        let dst_key = self.get_node_key(&dst_marker)?;
        self.changed_edge_av
            .insert((src_key, dst_key), value.clone());
        let old_value = self.graph.set_edge_attr((src_key, dst_key), value.clone());
        if old_value.is_some() {
            // we only changed it if it exists, by semantics of set_edge_attr
            self.changed_edge_av.insert((src_key, dst_key), value);
        } else {
            log::warn!(
                "Attempted to set edge value for non-existing edge from {:?} to {:?}.",
                src_key,
                dst_key
            );
        }
        old_value
    }

    pub fn maybe_set_edge_value(
        &mut self,
        src_marker: impl Into<NodeMarker>,
        dst_marker: impl Into<NodeMarker>,
        maybe_written_av: G::EdgeAttr,
        join: impl Fn(&G::EdgeAttr, &G::EdgeAttr) -> Option<G::EdgeAttr>,
    ) -> Option<G::EdgeAttr> {
        let src_marker = src_marker.into();
        let dst_marker = dst_marker.into();
        let src_key = self.get_node_key(&src_marker)?;
        let dst_key = self.get_node_key(&dst_marker)?;
        if let Some(old_av) = self.graph.get_edge_attr((src_key, dst_key)) {
            // only remember that we maybe wrote "maybe_writte_av".
            self.changed_edge_av
                .insert((src_key, dst_key), maybe_written_av.clone());
            // Merge the current AV with the new value.
            let merged_av = join(old_av, &maybe_written_av)
                .expect("must be able to join. TODO: think about if this requirement makes sense");
            // merged_av is the new value we want to set.
            self.graph.set_edge_attr((src_key, dst_key), merged_av)
        } else {
            log::warn!(
                "Attempted to set edge value for non-existing edge from {:?} to {:?}.",
                src_key,
                dst_key
            );
            None
        }
    }

    fn get_new_nodes_and_edges_from_desired_names(
        &self,
        desired_node_output_names: &HashMap<NewNodeMarker, AbstractOutputNodeMarker>,
    ) -> (
        HashMap<AbstractOutputNodeMarker, NodeKey>,
        Vec<(NodeKey, NodeKey)>,
    ) {
        let mut new_nodes = HashMap::new();
        for (marker, node_key) in &self.new_nodes_map {
            let Some(output_marker) = desired_node_output_names.get(&marker) else {
                continue;
            };
            new_nodes.insert(*output_marker, *node_key);
        }
        let mut new_edges = Vec::new();
        let new_node_or_existing = |node_key: &NodeKey| {
            new_nodes.values().any(|&n| n == *node_key)
                || self.subst.mapping.values().any(|&n| n == *node_key)
        };
        // only include edges that belong to nodes that are in new_nodes and/or the existing graph
        for (src_key, dst_key) in &self.new_edges {
            if new_node_or_existing(src_key) || new_node_or_existing(dst_key) {
                new_edges.push((*src_key, *dst_key));
            }
        }
        (new_nodes, new_edges)
    }

    pub fn get_abstract_output<
        S: Semantics<NodeAbstract = G::NodeAttr, EdgeAbstract = G::EdgeAttr>,
    >(
        &self,
        desired_node_output_names: HashMap<NewNodeMarker, AbstractOutputNodeMarker>,
    ) -> AbstractOperationOutput<S> {
        // TODO: does this make sense? why would an operation abstractly add a node but then not return it?
        let (new_nodes, new_edges) =
            self.get_new_nodes_and_edges_from_desired_names(&desired_node_output_names);

        // Only report changed av's for nodes and edges that are params
        // TODO: make sure the changed_av_nodes/edges skip deleted nodes/edges.
        let existing_nodes: HashSet<NodeKey> = self.subst.mapping.values().cloned().collect();
        let mut existing_edges = HashSet::new();
        for (src, dst, _) in self.graph.edges() {
            existing_edges.insert((src, dst));
        }
        let mut changed_abstract_values_nodes = HashMap::new();
        for (node_key, node_av) in &self.changed_node_av {
            if existing_nodes.contains(node_key) {
                changed_abstract_values_nodes.insert(*node_key, node_av.clone());
            }
        }
        let mut changed_abstract_edges = HashMap::new();
        for (&(src, dst), edge_av) in &self.changed_edge_av {
            if existing_edges.contains(&(src, dst)) {
                changed_abstract_edges.insert((src, dst), edge_av.clone());
            }
        }

        AbstractOperationOutput {
            new_nodes,
            // TODO: see if we want to also make the new edges optional?
            new_edges,
            // TODO: make this better? in theory, if a user were to add a node and then remove it again, same for edges, these containers
            //  would contain too much information.
            removed_edges: self.removed_edges.clone(),
            removed_nodes: self.removed_nodes.clone(),
            changed_abstract_values_nodes,
            changed_abstract_values_edges: changed_abstract_edges,
        }
    }

    pub fn get_concrete_output(
        &self,
        desired_node_output_names: HashMap<NewNodeMarker, AbstractOutputNodeMarker>,
    ) -> OperationOutput {
        let (new_nodes, _new_edges) =
            self.get_new_nodes_and_edges_from_desired_names(&desired_node_output_names);

        OperationOutput {
            new_nodes,
            // TODO: again, make sure this makes sense
            removed_nodes: self.removed_nodes.clone(),
        }
    }
}

#[derive(Debug)]
pub struct OperationArgument<'a> {
    pub selected_input_nodes: Cow<'a, [NodeKey]>,
    /// We know this substitution statically already, since we define our parameter substitutions statically.
    /// So we can store it in this struct.
    pub subst: ParameterSubstitution,
    /// Nodes for which an operation already has an in-scope handle.
    /// These nodes may not be matched in shape queries, as they could modify values and break the operation's
    /// abstract guarantees, since changes are not visible to outer operations.
    pub hidden_nodes: HashSet<NodeKey>,
    pub marker_set: &'a RefCell<MarkerSet>,
}

#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, From)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct AbstractOutputNodeMarker(pub InternString);
interned_string_newtype!(AbstractOutputNodeMarker);

/// Keeps track of node changes that happened during the operation execution.
///
/// This is mainly useful for keeping track of which nodes still exist after the operation,
/// without needing to scan the entire graph for changes.
///
/// It also allows operations to name their output nodes with `AbstractOutputNodeMarker`.
pub struct OperationOutput {
    pub new_nodes: HashMap<AbstractOutputNodeMarker, NodeKey>,
    // TODO: figure out if this is needed in the concrete? This can be used in the UDO runner to remove nodes from the mapping,
    //  but is there actually a downside to keeping nodes in the mapping? Think about edge cases.
    pub removed_nodes: Vec<NodeKey>,
}

impl OperationOutput {
    pub fn no_changes() -> Self {
        OperationOutput {
            new_nodes: HashMap::new(),
            removed_nodes: Vec::new(),
        }
    }
}

pub struct AbstractOperationOutput<S: Semantics> {
    pub new_nodes: HashMap<AbstractOutputNodeMarker, NodeKey>,
    pub removed_nodes: Vec<NodeKey>,
    pub new_edges: Vec<(NodeKey, NodeKey)>,
    pub removed_edges: Vec<(NodeKey, NodeKey)>,
    /// These maps contain any abstract values that are set (not necessarily changed) during the operation execution.
    pub changed_abstract_values_nodes: HashMap<NodeKey, S::NodeAbstract>,
    pub changed_abstract_values_edges: HashMap<(NodeKey, NodeKey), S::EdgeAbstract>,
}
// TODO(severe): since this is basically an AID output, we must make sure that during *concrete* execution,
//  we don't accidentally overwrite the mapping from AID to NodeKey from some existing operation.
//  This is because the OperationOutput of an `abstract_apply` could be empty, so we *dont know* that
//  we actually got an output node in the concrete graph. If that new concrete graph node has a clashing
//  name, we overwrite the potential existing mapping, which would cause a logic bug.
// TODO: add ^ to problems-testcases.md
// TODO: Would we fix this if we said OperationOutput is stored by userdefinedoperation
//  in the abstract, i.e., it has to store the mapping?
//  Then at concrete execution time, the user defined op only updates mappings for nodes that
//  it actually expects from the abstract output.
// UPDATE: This *may* be a problem. However, it's unlikely to be a problem.
// For this to be a problem, an operation needs to _sometimes_ return a node. Simply adding a node is not enough,
// since the UDO runner only updates the mapping for _returned_ nodes (as told by the ...Output struct family).
// The only operations that can _sometimes_ return a node are builtin operations.
// Hence I would argue this is a documentation issue (or slight client-DX issue), in that we need to document the
// BuiltinOperation trait better to indicate that _returned_ nodes must *always* be returned, and also be specified
// as a returned node in the operation's signature.