Skip to main content

hfx_core/
graph.rs

1//! Upstream adjacency graph types.
2
3use std::collections::HashMap;
4
5use crate::id::UnitId;
6use crate::level::Level;
7
8/// A single row in the upstream adjacency graph.
9///
10/// Represents one drainage unit and the set of same-level units directly upstream of it.
11#[derive(Debug, Clone, PartialEq)]
12pub struct AdjacencyRow {
13    id: UnitId,
14    level: Level,
15    upstream_ids: Vec<UnitId>,
16}
17
18impl AdjacencyRow {
19    /// Construct an [`AdjacencyRow`] from a unit ID, level, and upstream neighbours.
20    pub fn new(id: UnitId, level: Level, upstream_ids: Vec<UnitId>) -> Self {
21        Self {
22            id,
23            level,
24            upstream_ids,
25        }
26    }
27
28    /// Return the unit ID for this row.
29    pub fn id(&self) -> UnitId {
30        self.id
31    }
32
33    /// Return the dataset-local level for this row.
34    pub fn level(&self) -> Level {
35        self.level
36    }
37
38    /// Return the slice of upstream unit IDs.
39    pub fn upstream_ids(&self) -> &[UnitId] {
40        &self.upstream_ids
41    }
42
43    /// Return `true` if this unit has no upstream neighbours.
44    pub fn is_headwater(&self) -> bool {
45        self.upstream_ids.is_empty()
46    }
47}
48
49/// Errors from constructing a [`DrainageGraph`].
50#[derive(Debug, thiserror::Error)]
51pub enum GraphError {
52    /// Returned when the graph contains zero rows.
53    #[error("drainage graph must contain at least one atom")]
54    EmptyGraph,
55
56    /// Returned when the same unit ID appears in more than one row.
57    #[error("duplicate unit id {id} at row indices {first} and {second}")]
58    DuplicateUnitId {
59        /// The raw i64 value of the duplicated ID.
60        id: i64,
61        /// Index of the first occurrence.
62        first: usize,
63        /// Index of the second (duplicate) occurrence.
64        second: usize,
65    },
66}
67
68/// The complete upstream adjacency graph over drainage units.
69///
70/// This is the canonical in-memory representation of `graph.parquet` for
71/// validation and construction purposes. The HashMap-based index provides
72/// O(1) lookup by unit ID.
73///
74/// **Note for engine implementors:** this representation is optimized for
75/// validation and random access, not traversal. A delineation engine will
76/// typically convert to CSR (compressed sparse row) or another
77/// traversal-optimized layout at load time, as permitted by the HFX spec.
78#[derive(Debug, Clone, PartialEq)]
79pub struct DrainageGraph {
80    rows: Vec<AdjacencyRow>,
81    index: HashMap<UnitId, usize>,
82}
83
84impl DrainageGraph {
85    /// Construct a [`DrainageGraph`] from a vector of [`AdjacencyRow`]s.
86    ///
87    /// Builds the internal O(1) lookup index as part of construction.
88    ///
89    /// # Errors
90    ///
91    /// | Condition | Error variant |
92    /// |-----------|---------------|
93    /// | `rows` is empty | [`GraphError::EmptyGraph`] |
94    /// | The same [`UnitId`] appears in two or more rows | [`GraphError::DuplicateUnitId`] |
95    pub fn new(rows: Vec<AdjacencyRow>) -> Result<Self, GraphError> {
96        if rows.is_empty() {
97            return Err(GraphError::EmptyGraph);
98        }
99
100        let mut index = HashMap::with_capacity(rows.len());
101        for (i, row) in rows.iter().enumerate() {
102            if let Some(&first) = index.get(&row.id) {
103                return Err(GraphError::DuplicateUnitId {
104                    id: row.id.get(),
105                    first,
106                    second: i,
107                });
108            }
109            index.insert(row.id, i);
110        }
111
112        Ok(Self { rows, index })
113    }
114
115    /// Return all rows in the graph.
116    pub fn rows(&self) -> &[AdjacencyRow] {
117        &self.rows
118    }
119
120    /// Return the number of rows in the graph.
121    pub fn len(&self) -> usize {
122        self.rows.len()
123    }
124
125    /// Return `true` if the graph contains no rows.
126    pub fn is_empty(&self) -> bool {
127        self.rows.is_empty()
128    }
129
130    /// Look up a row by [`UnitId`] in O(1) time.
131    ///
132    /// Returns `None` if no row with the given ID exists in the graph.
133    pub fn get(&self, id: UnitId) -> Option<&AdjacencyRow> {
134        self.index.get(&id).map(|&i| &self.rows[i])
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    fn test_unit_id(raw: i64) -> UnitId {
143        UnitId::new(raw).unwrap()
144    }
145
146    fn test_level(raw: i16) -> Level {
147        Level::new(raw).unwrap()
148    }
149
150    fn headwater_row(id: i64) -> AdjacencyRow {
151        AdjacencyRow::new(test_unit_id(id), test_level(0), vec![])
152    }
153
154    fn interior_row(id: i64, upstream: &[i64]) -> AdjacencyRow {
155        let upstream_ids = upstream.iter().map(|&r| test_unit_id(r)).collect();
156        AdjacencyRow::new(test_unit_id(id), test_level(0), upstream_ids)
157    }
158
159    #[test]
160    fn single_headwater_row_has_len_one_and_is_not_empty() {
161        let graph = DrainageGraph::new(vec![headwater_row(1)]).unwrap();
162        assert_eq!(graph.len(), 1);
163        assert!(!graph.is_empty());
164    }
165
166    #[test]
167    fn empty_rows_fails_with_empty_graph_error() {
168        let err = DrainageGraph::new(vec![]).unwrap_err();
169        assert!(matches!(err, GraphError::EmptyGraph));
170    }
171
172    #[test]
173    fn duplicate_unit_id_fails_with_duplicate_error() {
174        let rows = vec![headwater_row(5), headwater_row(5)];
175        let err = DrainageGraph::new(rows).unwrap_err();
176        assert!(matches!(
177            err,
178            GraphError::DuplicateUnitId {
179                id: 5,
180                first: 0,
181                second: 1
182            }
183        ));
184    }
185
186    #[test]
187    fn get_returns_correct_row() {
188        let row = interior_row(10, &[11, 12]);
189        let graph =
190            DrainageGraph::new(vec![headwater_row(11), headwater_row(12), row.clone()]).unwrap();
191
192        let found = graph.get(test_unit_id(10)).unwrap();
193        assert_eq!(found.id(), test_unit_id(10));
194        assert_eq!(found.level(), test_level(0));
195        assert_eq!(found.upstream_ids().len(), 2);
196    }
197
198    #[test]
199    fn get_with_nonexistent_id_returns_none() {
200        let graph = DrainageGraph::new(vec![headwater_row(1)]).unwrap();
201        assert!(graph.get(test_unit_id(999)).is_none());
202    }
203
204    #[test]
205    fn is_headwater_true_for_empty_upstream_ids() {
206        let row = headwater_row(1);
207        assert!(row.is_headwater());
208    }
209
210    #[test]
211    fn is_headwater_false_for_nonempty_upstream_ids() {
212        let row = interior_row(10, &[1, 2]);
213        assert!(!row.is_headwater());
214    }
215}