Skip to main content

swh_graph/views/
subgraph.rs

1// Copyright (C) 2023-2025  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::collections::HashMap;
7use std::path::Path;
8
9use anyhow::{anyhow, Result};
10use webgraph::traits::labels::SortedIterator;
11
12use crate::arc_iterators::FlattenedSuccessorsIterator;
13use crate::graph::*;
14use crate::properties;
15use crate::{NodeConstraint, NodeType};
16
17macro_rules! make_filtered_arcs_iterator {
18    ($name:ident, $inner:ident, $( $next:tt )*) => {
19        pub struct $name<
20            'a,
21            $inner: Iterator<Item = NodeId> + 'a,
22            NodeFilter: Fn(NodeId) -> bool,
23            ArcFilter: Fn(NodeId, NodeId) -> bool,
24        > {
25            inner: $inner,
26            node: NodeId,
27            node_filter: &'a NodeFilter,
28            arc_filter: &'a ArcFilter,
29        }
30
31        impl<
32            'a,
33            $inner: Iterator<Item = NodeId> + 'a,
34            NodeFilter: Fn(NodeId) -> bool,
35            ArcFilter: Fn(NodeId, NodeId) -> bool,
36        > Iterator for $name<'a, $inner, NodeFilter, ArcFilter> {
37            type Item = $inner::Item;
38
39            $( $next )*
40        }
41
42        // SAFETY: filtering out elements out of an iterator preserves sortedness
43        unsafe impl<
44            'a,
45            $inner: SortedIterator<Item = NodeId> + 'a,
46            NodeFilter: Fn(NodeId) -> bool,
47            ArcFilter: Fn(NodeId, NodeId) -> bool,
48        > SortedIterator for $name<'a, $inner, NodeFilter, ArcFilter> {
49        }
50    }
51}
52
53make_filtered_arcs_iterator! {
54    FilteredSuccessors,
55    Successors,
56    fn next(&mut self) -> Option<Self::Item> {
57        if !(self.node_filter)(self.node) {
58            return None;
59        }
60
61        self.inner
62            .by_ref()
63            .find(|&dst| (self.node_filter)(dst) && (self.arc_filter)(self.node, dst))
64    }
65}
66make_filtered_arcs_iterator! {
67    FilteredPredecessors,
68    Predecessors,
69    fn next(&mut self) -> Option<Self::Item> {
70        if !(self.node_filter)(self.node) {
71            return None;
72        }
73
74        self.inner
75            .by_ref()
76            .find(|&src| (self.node_filter)(src) && (self.arc_filter)(src, self.node))
77    }
78}
79
80macro_rules! make_filtered_labeled_arcs_iterator {
81    ($name:ident, $inner:ident, $( $next:tt )*) => {
82        pub struct $name<
83            'a,
84            Labels,
85            $inner: Iterator<Item = (NodeId, Labels)> + 'a,
86            NodeFilter: Fn(NodeId) -> bool,
87            ArcFilter: Fn(NodeId, NodeId) -> bool,
88        > {
89            inner: $inner,
90            node: NodeId,
91            node_filter: &'a NodeFilter,
92            arc_filter: &'a ArcFilter,
93        }
94
95        impl<
96            'a,
97            Labels,
98            $inner: Iterator<Item = (NodeId, Labels)> + 'a,
99            NodeFilter: Fn(NodeId) -> bool,
100            ArcFilter: Fn(NodeId, NodeId) -> bool,
101        > Iterator for $name<'a, Labels, $inner, NodeFilter, ArcFilter> {
102            type Item = $inner::Item;
103
104            $( $next )*
105        }
106
107        // SAFETY: filtering out elements out of an iterator preserves sortedness
108        // 'Labels' itself does not need to be sorted because we only implement
109        // SortedIterator on the outer iterator, not in the inner one.
110        unsafe impl<
111            'a,
112            Labels,
113            $inner: SortedIterator<Item = (NodeId, Labels)> + 'a,
114            NodeFilter: Fn(NodeId) -> bool,
115            ArcFilter: Fn(NodeId, NodeId) -> bool,
116        > SortedIterator for $name<'a, Labels, $inner, NodeFilter, ArcFilter>
117        {
118        }
119
120        impl<
121            'a,
122            Labels: IntoIterator,
123            $inner: Iterator<Item = (NodeId, Labels)> + 'a,
124            NodeFilter: Fn(NodeId) -> bool,
125            ArcFilter: Fn(NodeId, NodeId) -> bool,
126        > IntoFlattenedLabeledArcsIterator<<Labels as IntoIterator>::Item> for $name<'a, Labels, $inner, NodeFilter, ArcFilter> {
127            type Flattened = FlattenedSuccessorsIterator<Self>;
128
129            fn flatten_labels(self) -> Self::Flattened {
130                FlattenedSuccessorsIterator::new(self)
131            }
132        }
133    }
134}
135
136make_filtered_labeled_arcs_iterator! {
137    FilteredLabeledSuccessors,
138    LabeledSuccessors,
139    fn next(&mut self) -> Option<Self::Item> {
140        if !(self.node_filter)(self.node) {
141            return None;
142        }
143        for (dst, label) in self.inner.by_ref() {
144            if (self.node_filter)(dst) && (self.arc_filter)(self.node, dst) {
145                return Some((dst, label))
146            }
147        }
148        None
149    }
150}
151make_filtered_labeled_arcs_iterator! {
152    FilteredLabeledPredecessors,
153    LabeledPredecessors,
154    fn next(&mut self) -> Option<Self::Item> {
155        if !(self.node_filter)(self.node) {
156            return None;
157        }
158        for (src, label) in self.inner.by_ref() {
159            if (self.node_filter)(src) && (self.arc_filter)(src, self.node) {
160                return Some((src, label))
161            }
162        }
163        None
164    }
165}
166
167/// A view over [`SwhGraph`] and related traits, that filters out some nodes and arcs
168/// based on arbitrary closures.
169pub struct Subgraph<G: SwhGraph, NodeFilter: Fn(usize) -> bool, ArcFilter: Fn(usize, usize) -> bool>
170{
171    pub graph: G,
172    pub node_filter: NodeFilter,
173    pub arc_filter: ArcFilter,
174    pub num_nodes_by_type: Option<HashMap<NodeType, usize>>,
175    pub num_arcs_by_type: Option<HashMap<(NodeType, NodeType), usize>>,
176}
177
178impl<G: SwhGraph, NodeFilter: Fn(usize) -> bool> Subgraph<G, NodeFilter, fn(usize, usize) -> bool> {
179    /// Create a [Subgraph] keeping only nodes matching a given node filter function.
180    ///
181    /// Shorthand for `Subgraph { graph, node_filter, arc_filter: |_src, _dst| true }`
182    pub fn with_node_filter(
183        graph: G,
184        node_filter: NodeFilter,
185    ) -> Subgraph<G, NodeFilter, fn(usize, usize) -> bool> {
186        Subgraph {
187            graph,
188            node_filter,
189            arc_filter: |_src, _dst| true,
190            num_nodes_by_type: None,
191            num_arcs_by_type: None,
192        }
193    }
194}
195
196impl<G: SwhGraph, ArcFilter: Fn(usize, usize) -> bool> Subgraph<G, fn(usize) -> bool, ArcFilter> {
197    /// Create a [Subgraph] keeping only arcs matching a arc filter function.
198    ///
199    /// Shorthand for `Subgraph { graph, node_filter: |_node| true, arc_filter }`
200    pub fn with_arc_filter(
201        graph: G,
202        arc_filter: ArcFilter,
203    ) -> Subgraph<G, fn(usize) -> bool, ArcFilter> {
204        Subgraph {
205            graph,
206            node_filter: |_node| true,
207            arc_filter,
208            num_nodes_by_type: None,
209            num_arcs_by_type: None,
210        }
211    }
212}
213
214impl<G> Subgraph<G, fn(usize) -> bool, fn(usize, usize) -> bool>
215where
216    G: SwhGraphWithProperties + Clone,
217    <G as SwhGraphWithProperties>::Maps: properties::Maps,
218{
219    /// Create a [Subgraph] keeping only nodes matching a given node constraint.
220    #[allow(clippy::type_complexity)]
221    pub fn with_node_constraint(
222        graph: G,
223        node_constraint: NodeConstraint,
224    ) -> Subgraph<G, impl Fn(NodeId) -> bool, fn(usize, usize) -> bool> {
225        Subgraph {
226            graph: graph.clone(),
227            num_nodes_by_type: graph.num_nodes_by_type().ok().map(|counts| {
228                counts
229                    .into_iter()
230                    .filter(|&(type_, _count)| node_constraint.matches(type_))
231                    .collect()
232            }),
233            num_arcs_by_type: graph.num_arcs_by_type().ok().map(|counts| {
234                counts
235                    .into_iter()
236                    .filter(|&((src_type, dst_type), _count)| {
237                        node_constraint.matches(src_type) && node_constraint.matches(dst_type)
238                    })
239                    .collect()
240            }),
241            node_filter: move |node| node_constraint.matches(graph.properties().node_type(node)),
242            arc_filter: |_src, _dst| true,
243        }
244    }
245}
246
247impl<G: SwhGraph, NodeFilter: Fn(usize) -> bool, ArcFilter: Fn(usize, usize) -> bool> SwhGraph
248    for Subgraph<G, NodeFilter, ArcFilter>
249{
250    fn path(&self) -> &Path {
251        self.graph.path()
252    }
253    fn is_transposed(&self) -> bool {
254        self.graph.is_transposed()
255    }
256    // Note: this return the number or nodes in the original graph, before
257    // subgraph filtering.
258    fn num_nodes(&self) -> usize {
259        self.graph.num_nodes()
260    }
261    fn has_node(&self, node_id: NodeId) -> bool {
262        (self.node_filter)(node_id)
263    }
264    // Note: this return the number or arcs in the original graph, before
265    // subgraph filtering.
266    fn num_arcs(&self) -> u64 {
267        self.graph.num_arcs()
268    }
269    fn num_nodes_by_type(&self) -> Result<HashMap<NodeType, usize>> {
270        self.num_nodes_by_type.clone().ok_or(anyhow!(
271            "num_nodes_by_type is not supported by this Subgraph (if possible, use Subgraph::with_node_constraint to build it)"
272        ))
273    }
274    fn num_arcs_by_type(&self) -> Result<HashMap<(NodeType, NodeType), usize>> {
275        self.num_arcs_by_type.clone().ok_or(anyhow!(
276            "num_arcs_by_type is not supported by this Subgraph (if possible, use Subgraph::with_node_constraint to build it)"
277        ))
278    }
279    fn has_arc(&self, src_node_id: NodeId, dst_node_id: NodeId) -> bool {
280        (self.node_filter)(src_node_id)
281            && (self.node_filter)(dst_node_id)
282            && (self.arc_filter)(src_node_id, dst_node_id)
283            && self.graph.has_arc(src_node_id, dst_node_id)
284    }
285}
286
287impl<G: SwhForwardGraph, NodeFilter: Fn(usize) -> bool, ArcFilter: Fn(usize, usize) -> bool>
288    SwhForwardGraph for Subgraph<G, NodeFilter, ArcFilter>
289{
290    type Successors<'succ>
291        = FilteredSuccessors<
292        'succ,
293        <<G as SwhForwardGraph>::Successors<'succ> as IntoIterator>::IntoIter,
294        NodeFilter,
295        ArcFilter,
296    >
297    where
298        Self: 'succ;
299
300    fn successors(&self, node_id: NodeId) -> Self::Successors<'_> {
301        FilteredSuccessors {
302            inner: self.graph.successors(node_id).into_iter(),
303            node: node_id,
304            node_filter: &self.node_filter,
305            arc_filter: &self.arc_filter,
306        }
307    }
308    fn outdegree(&self, node_id: NodeId) -> usize {
309        self.successors(node_id).count()
310    }
311}
312
313impl<G: SwhBackwardGraph, NodeFilter: Fn(usize) -> bool, ArcFilter: Fn(usize, usize) -> bool>
314    SwhBackwardGraph for Subgraph<G, NodeFilter, ArcFilter>
315{
316    type Predecessors<'succ>
317        = FilteredPredecessors<
318        'succ,
319        <<G as SwhBackwardGraph>::Predecessors<'succ> as IntoIterator>::IntoIter,
320        NodeFilter,
321        ArcFilter,
322    >
323    where
324        Self: 'succ;
325
326    fn predecessors(&self, node_id: NodeId) -> Self::Predecessors<'_> {
327        FilteredPredecessors {
328            inner: self.graph.predecessors(node_id).into_iter(),
329            node: node_id,
330            node_filter: &self.node_filter,
331            arc_filter: &self.arc_filter,
332        }
333    }
334    fn indegree(&self, node_id: NodeId) -> usize {
335        self.predecessors(node_id).count()
336    }
337}
338
339impl<
340        G: SwhLabeledForwardGraph,
341        NodeFilter: Fn(usize) -> bool,
342        ArcFilter: Fn(usize, usize) -> bool,
343    > SwhLabeledForwardGraph for Subgraph<G, NodeFilter, ArcFilter>
344{
345    type LabeledArcs<'arc>
346        = <G as SwhLabeledForwardGraph>::LabeledArcs<'arc>
347    where
348        Self: 'arc;
349    type LabeledSuccessors<'node>
350        = FilteredLabeledSuccessors<
351        'node,
352        Self::LabeledArcs<'node>,
353        <<G as SwhLabeledForwardGraph>::LabeledSuccessors<'node> as IntoIterator>::IntoIter,
354        NodeFilter,
355        ArcFilter,
356    >
357    where
358        Self: 'node;
359
360    fn untyped_labeled_successors(&self, node_id: NodeId) -> Self::LabeledSuccessors<'_> {
361        FilteredLabeledSuccessors {
362            inner: self.graph.untyped_labeled_successors(node_id).into_iter(),
363            node: node_id,
364            node_filter: &self.node_filter,
365            arc_filter: &self.arc_filter,
366        }
367    }
368}
369
370impl<
371        G: SwhLabeledBackwardGraph,
372        NodeFilter: Fn(usize) -> bool,
373        ArcFilter: Fn(usize, usize) -> bool,
374    > SwhLabeledBackwardGraph for Subgraph<G, NodeFilter, ArcFilter>
375{
376    type LabeledArcs<'arc>
377        = <G as SwhLabeledBackwardGraph>::LabeledArcs<'arc>
378    where
379        Self: 'arc;
380    type LabeledPredecessors<'node>
381        = FilteredLabeledPredecessors<
382        'node,
383        Self::LabeledArcs<'node>,
384        <<G as SwhLabeledBackwardGraph>::LabeledPredecessors<'node> as IntoIterator>::IntoIter,
385        NodeFilter,
386        ArcFilter,
387    >
388    where
389        Self: 'node;
390
391    fn untyped_labeled_predecessors(&self, node_id: NodeId) -> Self::LabeledPredecessors<'_> {
392        FilteredLabeledPredecessors {
393            inner: self.graph.untyped_labeled_predecessors(node_id).into_iter(),
394            node: node_id,
395            node_filter: &self.node_filter,
396            arc_filter: &self.arc_filter,
397        }
398    }
399}
400
401impl<
402        G: SwhGraphWithProperties,
403        NodeFilter: Fn(usize) -> bool,
404        ArcFilter: Fn(usize, usize) -> bool,
405    > SwhGraphWithProperties for Subgraph<G, NodeFilter, ArcFilter>
406{
407    type Maps = <G as SwhGraphWithProperties>::Maps;
408    type Timestamps = <G as SwhGraphWithProperties>::Timestamps;
409    type Persons = <G as SwhGraphWithProperties>::Persons;
410    type Contents = <G as SwhGraphWithProperties>::Contents;
411    type Strings = <G as SwhGraphWithProperties>::Strings;
412    type LabelNames = <G as SwhGraphWithProperties>::LabelNames;
413
414    fn properties(
415        &self,
416    ) -> &properties::SwhGraphProperties<
417        Self::Maps,
418        Self::Timestamps,
419        Self::Persons,
420        Self::Contents,
421        Self::Strings,
422        Self::LabelNames,
423    > {
424        self.graph.properties()
425    }
426}