Skip to main content

haystack_core/graph/
entity_graph.rs

1// EntityGraph — in-memory entity store with bitmap indexing and ref adjacency.
2
3use std::collections::HashMap;
4
5use parking_lot::Mutex;
6use rayon::prelude::*;
7
8use crate::data::{HCol, HDict, HGrid};
9use crate::filter::{matches_with_ns, parse_filter};
10use crate::kinds::{HRef, Kind};
11use crate::ontology::{DefNamespace, ValidationIssue};
12
13use super::adjacency::RefAdjacency;
14use super::bitmap::TagBitmapIndex;
15use super::changelog::{DiffOp, GraphDiff};
16use super::columnar::ColumnarStore;
17use super::csr::CsrAdjacency;
18use super::query_planner;
19use super::value_index::ValueIndex;
20
21/// Errors returned by EntityGraph operations.
22#[derive(Debug, thiserror::Error)]
23pub enum GraphError {
24    #[error("entity missing 'id' tag")]
25    MissingId,
26    #[error("entity id must be a Ref")]
27    InvalidId,
28    #[error("entity already exists: {0}")]
29    DuplicateRef(String),
30    #[error("entity not found: {0}")]
31    NotFound(String),
32    #[error("filter error: {0}")]
33    Filter(String),
34}
35
36/// Core entity graph with bitmap tag indexing and bidirectional ref adjacency.
37pub struct EntityGraph {
38    /// ref_val -> entity dict
39    entities: HashMap<String, HDict>,
40    /// ref_val -> internal numeric id (for bitmap indexing)
41    id_map: HashMap<String, usize>,
42    /// internal numeric id -> ref_val
43    reverse_id: HashMap<usize, String>,
44    /// Next internal id to assign.
45    next_id: usize,
46    /// Tag bitmap index for fast has/missing queries.
47    tag_index: TagBitmapIndex,
48    /// Bidirectional ref adjacency for graph traversal.
49    adjacency: RefAdjacency,
50    /// Optional ontology namespace for spec-aware operations.
51    namespace: Option<DefNamespace>,
52    /// Monotonic version counter, incremented on every mutation.
53    version: u64,
54    /// Ordered list of mutations.
55    changelog: std::collections::VecDeque<GraphDiff>,
56    /// LRU query cache: (filter, version) → matching ref_vals.
57    /// Uses Mutex for interior mutability since read_all takes &self.
58    query_cache: Mutex<QueryCache>,
59    /// Optional B-Tree value indexes for comparison-based filter acceleration.
60    value_index: ValueIndex,
61    /// CSR snapshot of adjacency for cache-friendly traversal.
62    /// Rebuilt lazily via `rebuild_csr()`. `None` until first build.
63    csr: Option<CsrAdjacency>,
64    /// Version at which the CSR was last rebuilt (for staleness detection).
65    csr_version: u64,
66    /// Columnar storage for cache-friendly single-tag scans.
67    columnar: ColumnarStore,
68}
69
70/// Simple fixed-capacity LRU cache for filter query results.
71struct QueryCache {
72    entries: Vec<QueryCacheEntry>,
73    capacity: usize,
74}
75
76struct QueryCacheEntry {
77    filter: String,
78    version: u64,
79    ref_vals: Vec<String>,
80}
81
82impl QueryCache {
83    fn new(capacity: usize) -> Self {
84        Self {
85            entries: Vec::with_capacity(capacity),
86            capacity,
87        }
88    }
89
90    fn get(&mut self, filter: &str, version: u64) -> Option<&[String]> {
91        let pos = self
92            .entries
93            .iter()
94            .position(|e| e.version == version && e.filter == filter)?;
95        // Move to front (most recently used)
96        if pos > 0 {
97            let entry = self.entries.remove(pos);
98            self.entries.insert(0, entry);
99        }
100        Some(&self.entries[0].ref_vals)
101    }
102
103    fn insert(&mut self, filter: String, version: u64, ref_vals: Vec<String>) {
104        // Evict oldest if at capacity
105        if self.entries.len() >= self.capacity {
106            self.entries.pop();
107        }
108        self.entries.insert(
109            0,
110            QueryCacheEntry {
111                filter,
112                version,
113                ref_vals,
114            },
115        );
116    }
117}
118
119const MAX_CHANGELOG: usize = 10_000;
120const QUERY_CACHE_CAPACITY: usize = 256;
121
122impl EntityGraph {
123    /// Create an empty entity graph.
124    pub fn new() -> Self {
125        Self {
126            entities: HashMap::new(),
127            id_map: HashMap::new(),
128            reverse_id: HashMap::new(),
129            next_id: 0,
130            tag_index: TagBitmapIndex::new(),
131            adjacency: RefAdjacency::new(),
132            namespace: None,
133            version: 0,
134            changelog: std::collections::VecDeque::new(),
135            query_cache: Mutex::new(QueryCache::new(QUERY_CACHE_CAPACITY)),
136            value_index: ValueIndex::new(),
137            csr: None,
138            csr_version: 0,
139            columnar: ColumnarStore::new(),
140        }
141    }
142
143    /// Create an entity graph with an ontology namespace.
144    pub fn with_namespace(ns: DefNamespace) -> Self {
145        Self {
146            namespace: Some(ns),
147            ..Self::new()
148        }
149    }
150
151    // ── Value Indexes ──
152
153    /// Register a field for B-Tree value indexing. Enables O(log N) range
154    /// queries (e.g. `temp > 72`) for this field. Must be called before
155    /// entities are added, or followed by `rebuild_value_index` for existing data.
156    pub fn index_field(&mut self, field: &str) {
157        self.value_index.index_field(field);
158    }
159
160    /// Rebuild the value index for all indexed fields from the current entities.
161    pub fn rebuild_value_index(&mut self) {
162        self.value_index.clear();
163        for (ref_val, entity) in &self.entities {
164            if let Some(&eid) = self.id_map.get(ref_val.as_str()) {
165                for (name, val) in entity.iter() {
166                    if self.value_index.has_index(name) {
167                        self.value_index.add(eid, name, val);
168                    }
169                }
170            }
171        }
172    }
173
174    /// Returns a reference to the value index (for use by the query planner).
175    pub fn value_index(&self) -> &ValueIndex {
176        &self.value_index
177    }
178
179    // ── Columnar Storage ──
180
181    /// Register a tag for columnar storage. Enables cache-friendly sequential
182    /// scans for this tag. Must be called before entities are added, or followed
183    /// by `rebuild_columnar` for existing data.
184    pub fn track_column(&mut self, tag: &str) {
185        self.columnar.track_tag(tag);
186    }
187
188    /// Rebuild all tracked columnar data from current entities.
189    pub fn rebuild_columnar(&mut self) {
190        self.columnar.clear();
191        self.columnar.ensure_capacity(self.next_id);
192        for (ref_val, entity) in &self.entities {
193            if let Some(&eid) = self.id_map.get(ref_val.as_str()) {
194                for (name, val) in entity.iter() {
195                    if self.columnar.is_tracked(name) {
196                        self.columnar.set(eid, name, val);
197                    }
198                }
199            }
200        }
201    }
202
203    /// Returns a reference to the columnar store for direct column scans.
204    pub fn columnar(&self) -> &ColumnarStore {
205        &self.columnar
206    }
207
208    // ── CRUD ──
209
210    /// Add an entity to the graph.
211    ///
212    /// The entity must have an `id` tag that is a `Ref`. Returns the ref
213    /// value string on success.
214    pub fn add(&mut self, entity: HDict) -> Result<String, GraphError> {
215        let ref_val = extract_ref_val(&entity)?;
216
217        if self.entities.contains_key(&ref_val) {
218            return Err(GraphError::DuplicateRef(ref_val));
219        }
220
221        let eid = self.next_id;
222        self.next_id = self.next_id.checked_add(1).ok_or(GraphError::InvalidId)?;
223
224        self.id_map.insert(ref_val.clone(), eid);
225        self.reverse_id.insert(eid, ref_val.clone());
226
227        // Index before inserting (borrows entity immutably, self mutably).
228        self.index_tags(eid, &entity);
229        self.index_refs(eid, &entity);
230
231        // Clone for the changelog, then move the entity into the map.
232        let entity_for_log = entity.clone();
233        self.entities.insert(ref_val.clone(), entity);
234
235        self.version += 1;
236        self.csr = None; // Invalidate CSR snapshot on mutation.
237        self.push_changelog(GraphDiff {
238            version: self.version,
239            op: DiffOp::Add,
240            ref_val: ref_val.clone(),
241            old: None,
242            new: Some(entity_for_log),
243        });
244
245        Ok(ref_val)
246    }
247
248    /// Get a reference to an entity by ref value.
249    pub fn get(&self, ref_val: &str) -> Option<&HDict> {
250        self.entities.get(ref_val)
251    }
252
253    /// Update an existing entity by merging `changes` into it.
254    ///
255    /// Tags in `changes` overwrite existing tags; `Kind::Remove` tags are
256    /// deleted. The `id` tag cannot be changed.
257    pub fn update(&mut self, ref_val: &str, changes: HDict) -> Result<(), GraphError> {
258        let eid = *self
259            .id_map
260            .get(ref_val)
261            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
262
263        // Remove the old entity from the map (avoids an extra clone).
264        let mut old_entity = self
265            .entities
266            .remove(ref_val)
267            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
268
269        // Remove old indexing.
270        self.remove_indexing(eid, &old_entity);
271
272        // Snapshot old state for changelog, then merge in-place.
273        let old_snapshot = old_entity.clone();
274        old_entity.merge(&changes);
275
276        // Re-index before re-inserting (entity is a local value, no borrow conflict).
277        self.index_tags(eid, &old_entity);
278        self.index_refs(eid, &old_entity);
279
280        // Clone for the changelog, then move the updated entity into the map.
281        let updated_for_log = old_entity.clone();
282        self.entities.insert(ref_val.to_string(), old_entity);
283
284        self.version += 1;
285        self.csr = None; // Invalidate CSR snapshot on mutation.
286        self.push_changelog(GraphDiff {
287            version: self.version,
288            op: DiffOp::Update,
289            ref_val: ref_val.to_string(),
290            old: Some(old_snapshot),
291            new: Some(updated_for_log),
292        });
293
294        Ok(())
295    }
296
297    /// Remove an entity from the graph. Returns the removed entity.
298    pub fn remove(&mut self, ref_val: &str) -> Result<HDict, GraphError> {
299        let eid = self
300            .id_map
301            .remove(ref_val)
302            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
303
304        self.reverse_id.remove(&eid);
305
306        let entity = self
307            .entities
308            .remove(ref_val)
309            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
310
311        self.remove_indexing(eid, &entity);
312
313        self.version += 1;
314        self.csr = None; // Invalidate CSR snapshot on mutation.
315        self.push_changelog(GraphDiff {
316            version: self.version,
317            op: DiffOp::Remove,
318            ref_val: ref_val.to_string(),
319            old: Some(entity.clone()),
320            new: None,
321        });
322
323        Ok(entity)
324    }
325
326    // ── Query ──
327
328    /// Run a filter expression and return matching entities as a grid.
329    pub fn read(&self, filter_expr: &str, limit: usize) -> Result<HGrid, GraphError> {
330        let results = self.read_all(filter_expr, limit)?;
331
332        if results.is_empty() {
333            return Ok(HGrid::new());
334        }
335
336        // Collect all unique column names.
337        let mut col_set: Vec<String> = Vec::new();
338        let mut seen = std::collections::HashSet::new();
339        for entity in &results {
340            for name in entity.tag_names() {
341                if seen.insert(name.to_string()) {
342                    col_set.push(name.to_string());
343                }
344            }
345        }
346        col_set.sort();
347        let cols: Vec<HCol> = col_set.iter().map(|n| HCol::new(n.as_str())).collect();
348        let rows: Vec<HDict> = results.into_iter().cloned().collect();
349
350        Ok(HGrid::from_parts(HDict::new(), cols, rows))
351    }
352
353    /// Run a filter expression and return matching entities as references.
354    pub fn read_all(&self, filter_expr: &str, limit: usize) -> Result<Vec<&HDict>, GraphError> {
355        let effective_limit = if limit == 0 { usize::MAX } else { limit };
356
357        // Check query cache (version-keyed, so mutations auto-invalidate).
358        {
359            let mut cache = self.query_cache.lock();
360            if let Some(cached_refs) = cache.get(filter_expr, self.version) {
361                let mut results = Vec::new();
362                for rv in cached_refs {
363                    if results.len() >= effective_limit {
364                        break;
365                    }
366                    if let Some(entity) = self.entities.get(rv) {
367                        results.push(entity);
368                    }
369                }
370                return Ok(results);
371            }
372        }
373
374        let ast = parse_filter(filter_expr).map_err(|e| GraphError::Filter(e.to_string()))?;
375
376        // Phase 1: bitmap acceleration (with value index enhancement).
377        let max_id = self.next_id;
378        let candidates = query_planner::bitmap_candidates_with_values(
379            &ast,
380            &self.tag_index,
381            &self.value_index,
382            max_id,
383        );
384
385        // Phase 2: full filter evaluation.
386        let resolver = |r: &HRef| -> Option<&HDict> { self.entities.get(&r.val) };
387        let ns = self.namespace.as_ref();
388
389        /// Threshold for parallel evaluation — below this, sequential is faster.
390        const PARALLEL_THRESHOLD: usize = 500;
391
392        let mut results: Vec<&HDict>;
393
394        if let Some(ref bitmap) = candidates {
395            let candidate_ids: Vec<usize> = TagBitmapIndex::iter_set_bits(bitmap).collect();
396
397            if candidate_ids.len() >= PARALLEL_THRESHOLD && effective_limit == usize::MAX {
398                // Parallel path for large, unlimited queries.
399                results = candidate_ids
400                    .par_iter()
401                    .filter_map(|&eid| {
402                        let ref_val = self.reverse_id.get(&eid)?;
403                        let entity = self.entities.get(ref_val)?;
404                        if matches_with_ns(&ast, entity, Some(&resolver), ns) {
405                            Some(entity)
406                        } else {
407                            None
408                        }
409                    })
410                    .collect();
411            } else {
412                // Sequential path for small sets or limited queries.
413                results = Vec::new();
414                for eid in TagBitmapIndex::iter_set_bits(bitmap) {
415                    if results.len() >= effective_limit {
416                        break;
417                    }
418                    if let Some(ref_val) = self.reverse_id.get(&eid)
419                        && let Some(entity) = self.entities.get(ref_val)
420                        && matches_with_ns(&ast, entity, Some(&resolver), ns)
421                    {
422                        results.push(entity);
423                    }
424                }
425            }
426        } else {
427            let entity_count = self.entities.len();
428
429            if entity_count >= PARALLEL_THRESHOLD && effective_limit == usize::MAX {
430                // Parallel full scan for large, unlimited queries.
431                results = self
432                    .entities
433                    .par_iter()
434                    .filter_map(|(_, entity)| {
435                        if matches_with_ns(&ast, entity, Some(&resolver), ns) {
436                            Some(entity)
437                        } else {
438                            None
439                        }
440                    })
441                    .collect();
442            } else {
443                // Sequential full scan.
444                results = Vec::new();
445                for entity in self.entities.values() {
446                    if results.len() >= effective_limit {
447                        break;
448                    }
449                    if matches_with_ns(&ast, entity, Some(&resolver), ns) {
450                        results.push(entity);
451                    }
452                }
453            }
454        }
455
456        // Apply limit to parallel results.
457        if results.len() > effective_limit {
458            results.truncate(effective_limit);
459        }
460
461        // Populate cache with result ref_vals (only for unlimited queries to
462        // avoid caching partial results that depend on limit).
463        if limit == 0 {
464            let ref_vals: Vec<String> = results
465                .iter()
466                .filter_map(|e| {
467                    e.get("id").and_then(|k| match k {
468                        Kind::Ref(r) => Some(r.val.clone()),
469                        _ => None,
470                    })
471                })
472                .collect();
473            let mut cache = self.query_cache.lock();
474            cache.insert(filter_expr.to_string(), self.version, ref_vals);
475        }
476
477        Ok(results)
478    }
479
480    // ── Ref traversal ──
481
482    /// Get ref values that the given entity points to.
483    pub fn refs_from(&self, ref_val: &str, ref_type: Option<&str>) -> Vec<String> {
484        match self.id_map.get(ref_val) {
485            Some(&eid) => {
486                if let Some(csr) = &self.csr {
487                    csr.targets_from(eid, ref_type)
488                } else {
489                    self.adjacency.targets_from(eid, ref_type)
490                }
491            }
492            None => Vec::new(),
493        }
494    }
495
496    /// Get ref values of entities that point to the given entity.
497    pub fn refs_to(&self, ref_val: &str, ref_type: Option<&str>) -> Vec<String> {
498        if let Some(csr) = &self.csr {
499            csr.sources_to(ref_val, ref_type)
500                .iter()
501                .filter_map(|eid| self.reverse_id.get(eid).cloned())
502                .collect()
503        } else {
504            self.adjacency
505                .sources_to(ref_val, ref_type)
506                .iter()
507                .filter_map(|eid| self.reverse_id.get(eid).cloned())
508                .collect()
509        }
510    }
511
512    /// Rebuild the CSR snapshot from the current adjacency.
513    /// Should be called after a batch of mutations (e.g., import, sync).
514    pub fn rebuild_csr(&mut self) {
515        let max_id = if self.next_id > 0 { self.next_id } else { 0 };
516        self.csr = Some(CsrAdjacency::from_ref_adjacency(&self.adjacency, max_id));
517        self.csr_version = self.version;
518    }
519
520    /// Returns true if the CSR snapshot is stale (version mismatch).
521    pub fn csr_is_stale(&self) -> bool {
522        match &self.csr {
523            Some(_) => self.csr_version != self.version,
524            None => true,
525        }
526    }
527
528    // ── Graph traversal ──
529
530    /// Return all edges in the graph as `(source_ref, ref_tag, target_ref)` tuples.
531    pub fn all_edges(&self) -> Vec<(String, String, String)> {
532        let mut edges = Vec::new();
533        for (&eid, ref_val) in &self.reverse_id {
534            if let Some(fwd) = self.adjacency.forward_raw().get(&eid) {
535                for (ref_tag, target) in fwd {
536                    edges.push((ref_val.clone(), ref_tag.clone(), target.clone()));
537                }
538            }
539        }
540        edges
541    }
542
543    /// BFS neighborhood: return entities and edges within `hops` of `ref_val`.
544    ///
545    /// `ref_types` optionally restricts which ref tags are traversed.
546    /// Returns `(entities, edges)` where edges are `(source, ref_tag, target)`.
547    pub fn neighbors(
548        &self,
549        ref_val: &str,
550        hops: usize,
551        ref_types: Option<&[&str]>,
552    ) -> (Vec<&HDict>, Vec<(String, String, String)>) {
553        use std::collections::{HashSet, VecDeque};
554
555        let mut visited: HashSet<String> = HashSet::new();
556        let mut queue: VecDeque<(String, usize)> = VecDeque::new();
557        let mut result_entities: Vec<&HDict> = Vec::new();
558        let mut result_edges: Vec<(String, String, String)> = Vec::new();
559
560        visited.insert(ref_val.to_string());
561        queue.push_back((ref_val.to_string(), 0));
562
563        if let Some(entity) = self.entities.get(ref_val) {
564            result_entities.push(entity);
565        }
566
567        while let Some((current, depth)) = queue.pop_front() {
568            if depth >= hops {
569                continue;
570            }
571            // Traverse forward edges
572            if let Some(&eid) = self.id_map.get(&current) {
573                if let Some(fwd) = self.adjacency.forward_raw().get(&eid) {
574                    for (ref_tag, target) in fwd {
575                        if let Some(types) = ref_types {
576                            if !types.iter().any(|t| t == ref_tag) {
577                                continue;
578                            }
579                        }
580                        result_edges.push((current.clone(), ref_tag.clone(), target.clone()));
581                        if visited.insert(target.clone()) {
582                            if let Some(entity) = self.entities.get(target.as_str()) {
583                                result_entities.push(entity);
584                            }
585                            queue.push_back((target.clone(), depth + 1));
586                        }
587                    }
588                }
589            }
590            // Traverse reverse edges
591            if let Some(rev) = self.adjacency.reverse_raw().get(&current) {
592                for (ref_tag, source_eid) in rev {
593                    if let Some(types) = ref_types {
594                        if !types.iter().any(|t| t == ref_tag) {
595                            continue;
596                        }
597                    }
598                    if let Some(source_ref) = self.reverse_id.get(source_eid) {
599                        result_edges.push((source_ref.clone(), ref_tag.clone(), current.clone()));
600                        if visited.insert(source_ref.clone()) {
601                            if let Some(entity) = self.entities.get(source_ref.as_str()) {
602                                result_entities.push(entity);
603                            }
604                            queue.push_back((source_ref.clone(), depth + 1));
605                        }
606                    }
607                }
608            }
609        }
610
611        result_entities.sort_by(|a, b| {
612            let a_id = a.id().map(|r| r.val.as_str()).unwrap_or("");
613            let b_id = b.id().map(|r| r.val.as_str()).unwrap_or("");
614            a_id.cmp(b_id)
615        });
616        result_edges.sort();
617        // Deduplicate edges (reverse traversal can produce duplicates)
618        result_edges.dedup();
619
620        (result_entities, result_edges)
621    }
622
623    /// BFS shortest path from `from` to `to`. Returns ordered ref_vals, or
624    /// empty vec if no path exists.
625    pub fn shortest_path(&self, from: &str, to: &str) -> Vec<String> {
626        use std::collections::{HashMap as StdHashMap, VecDeque};
627
628        if from == to {
629            return vec![from.to_string()];
630        }
631        if !self.entities.contains_key(from) || !self.entities.contains_key(to) {
632            return Vec::new();
633        }
634
635        let mut visited: StdHashMap<String, String> = StdHashMap::new(); // child -> parent
636        let mut queue: VecDeque<String> = VecDeque::new();
637        visited.insert(from.to_string(), String::new());
638        queue.push_back(from.to_string());
639
640        while let Some(current) = queue.pop_front() {
641            // Forward edges
642            if let Some(&eid) = self.id_map.get(&current) {
643                if let Some(fwd) = self.adjacency.forward_raw().get(&eid) {
644                    for (_, target) in fwd {
645                        if !visited.contains_key(target) {
646                            visited.insert(target.clone(), current.clone());
647                            if target == to {
648                                return Self::reconstruct_path(&visited, to);
649                            }
650                            queue.push_back(target.clone());
651                        }
652                    }
653                }
654            }
655            // Reverse edges
656            if let Some(rev) = self.adjacency.reverse_raw().get(&current) {
657                for (_, source_eid) in rev {
658                    if let Some(source_ref) = self.reverse_id.get(source_eid) {
659                        if !visited.contains_key(source_ref) {
660                            visited.insert(source_ref.clone(), current.clone());
661                            if source_ref == to {
662                                return Self::reconstruct_path(&visited, to);
663                            }
664                            queue.push_back(source_ref.clone());
665                        }
666                    }
667                }
668            }
669        }
670
671        Vec::new() // No path found
672    }
673
674    /// Reconstruct path from BFS parent map.
675    fn reconstruct_path(
676        parents: &std::collections::HashMap<String, String>,
677        to: &str,
678    ) -> Vec<String> {
679        let mut path = vec![to.to_string()];
680        let mut current = to.to_string();
681        while let Some(parent) = parents.get(&current) {
682            if parent.is_empty() {
683                break;
684            }
685            path.push(parent.clone());
686            current = parent.clone();
687        }
688        path.reverse();
689        path
690    }
691
692    /// Return the subtree rooted at `root` up to `max_depth` levels.
693    ///
694    /// Walks reverse refs (children referencing parent). Returns entities
695    /// paired with their depth from root. Root is depth 0.
696    pub fn subtree(&self, root: &str, max_depth: usize) -> Vec<(&HDict, usize)> {
697        use std::collections::{HashSet, VecDeque};
698
699        let mut visited: HashSet<String> = HashSet::new();
700        let mut queue: VecDeque<(String, usize)> = VecDeque::new();
701        let mut results: Vec<(&HDict, usize)> = Vec::new();
702
703        visited.insert(root.to_string());
704        queue.push_back((root.to_string(), 0));
705
706        if let Some(entity) = self.entities.get(root) {
707            results.push((entity, 0));
708        } else {
709            return Vec::new();
710        }
711
712        while let Some((current, depth)) = queue.pop_front() {
713            if depth >= max_depth {
714                continue;
715            }
716            // Children = entities that reference current (reverse refs)
717            let child_refs = self.refs_to(&current, None);
718            for child_ref in child_refs {
719                if visited.insert(child_ref.clone()) {
720                    if let Some(entity) = self.entities.get(&child_ref) {
721                        results.push((entity, depth + 1));
722                        queue.push_back((child_ref, depth + 1));
723                    }
724                }
725            }
726        }
727
728        results
729    }
730
731    // ── Spec-aware ──
732
733    /// Find all entities that structurally fit a spec/type name.
734    ///
735    /// Requires a namespace to be set. Returns empty if no namespace.
736    pub fn entities_fitting(&self, spec_name: &str) -> Vec<&HDict> {
737        match &self.namespace {
738            Some(ns) => self
739                .entities
740                .values()
741                .filter(|e| ns.fits(e, spec_name))
742                .collect(),
743            None => Vec::new(),
744        }
745    }
746
747    /// Validate all entities against the namespace and check for dangling refs.
748    ///
749    /// Returns empty if no namespace is set and no dangling refs exist.
750    pub fn validate(&self) -> Vec<ValidationIssue> {
751        let mut issues: Vec<ValidationIssue> = match &self.namespace {
752            Some(ns) => self
753                .entities
754                .values()
755                .flat_map(|e| ns.validate_entity(e))
756                .collect(),
757            None => Vec::new(),
758        };
759
760        // Check for dangling refs: Ref values (except `id`) that point to
761        // entities not present in the graph.
762        for entity in self.entities.values() {
763            let entity_ref = entity.id().map(|r| r.val.as_str());
764            for (name, val) in entity.iter() {
765                if name == "id" {
766                    continue;
767                }
768                if let Kind::Ref(r) = val
769                    && !self.entities.contains_key(&r.val)
770                {
771                    issues.push(ValidationIssue {
772                        entity: entity_ref.map(|s| s.to_string()),
773                        issue_type: "dangling_ref".to_string(),
774                        detail: format!(
775                            "tag '{}' references '{}' which does not exist in the graph",
776                            name, r.val
777                        ),
778                    });
779                }
780            }
781        }
782
783        issues
784    }
785
786    // ── Serialization ──
787
788    /// Convert matching entities to a grid.
789    ///
790    /// If `filter_expr` is empty, exports all entities.
791    /// Otherwise, delegates to `read`.
792    pub fn to_grid(&self, filter_expr: &str) -> Result<HGrid, GraphError> {
793        if filter_expr.is_empty() {
794            let entities: Vec<&HDict> = self.entities.values().collect();
795            return Ok(Self::entities_to_grid(&entities));
796        }
797        self.read(filter_expr, 0)
798    }
799
800    /// Build a grid from a slice of entity references.
801    fn entities_to_grid(entities: &[&HDict]) -> HGrid {
802        if entities.is_empty() {
803            return HGrid::new();
804        }
805
806        let mut col_set: Vec<String> = Vec::new();
807        let mut seen = std::collections::HashSet::new();
808        for entity in entities {
809            for name in entity.tag_names() {
810                if seen.insert(name.to_string()) {
811                    col_set.push(name.to_string());
812                }
813            }
814        }
815        col_set.sort();
816        let cols: Vec<HCol> = col_set.iter().map(|n| HCol::new(n.as_str())).collect();
817        let rows: Vec<HDict> = entities.iter().map(|e| (*e).clone()).collect();
818
819        HGrid::from_parts(HDict::new(), cols, rows)
820    }
821
822    /// Build an EntityGraph from a grid.
823    ///
824    /// Rows without a valid `id` Ref tag are silently skipped.
825    pub fn from_grid(grid: &HGrid, namespace: Option<DefNamespace>) -> Result<Self, GraphError> {
826        let mut graph = match namespace {
827            Some(ns) => Self::with_namespace(ns),
828            None => Self::new(),
829        };
830        for row in &grid.rows {
831            if row.id().is_some() {
832                graph.add(row.clone())?;
833            }
834        }
835        // Bulk import: build CSR once after all adds.
836        graph.rebuild_csr();
837        Ok(graph)
838    }
839
840    // ── Change tracking ──
841
842    /// Get changelog entries since a given version.
843    pub fn changes_since(&self, version: u64) -> Vec<&GraphDiff> {
844        let target = version + 1;
845        // VecDeque may not be contiguous, so collect matching entries.
846        self.changelog
847            .iter()
848            .filter(|d| d.version >= target)
849            .collect()
850    }
851
852    /// Current graph version (monotonically increasing).
853    pub fn version(&self) -> u64 {
854        self.version
855    }
856
857    // ── Container ──
858
859    /// Number of entities in the graph.
860    pub fn len(&self) -> usize {
861        self.entities.len()
862    }
863
864    /// Returns `true` if the graph has no entities.
865    pub fn is_empty(&self) -> bool {
866        self.entities.is_empty()
867    }
868
869    /// Returns `true` if an entity with the given ref value exists.
870    pub fn contains(&self, ref_val: &str) -> bool {
871        self.entities.contains_key(ref_val)
872    }
873
874    /// Returns references to all entities in the graph.
875    pub fn all(&self) -> Vec<&HDict> {
876        self.entities.values().collect()
877    }
878
879    // ── Internal indexing ──
880
881    /// Add tag bitmap entries for an entity.
882    fn index_tags(&mut self, entity_id: usize, entity: &HDict) {
883        let tags: Vec<String> = entity.tag_names().map(|s| s.to_string()).collect();
884        self.tag_index.add(entity_id, &tags);
885
886        // Update value indexes for any indexed fields present on this entity.
887        for (name, val) in entity.iter() {
888            if self.value_index.has_index(name) {
889                self.value_index.add(entity_id, name, val);
890            }
891            // Update columnar store for tracked tags.
892            if self.columnar.is_tracked(name) {
893                self.columnar.set(entity_id, name, val);
894            }
895        }
896    }
897
898    /// Add ref adjacency entries for an entity.
899    fn index_refs(&mut self, entity_id: usize, entity: &HDict) {
900        for (name, val) in entity.iter() {
901            if let Kind::Ref(r) = val {
902                // Skip the "id" tag — it is the entity's own identity,
903                // not a reference edge.
904                if name != "id" {
905                    self.adjacency.add(entity_id, name, &r.val);
906                }
907            }
908        }
909    }
910
911    /// Remove all index entries for an entity.
912    fn remove_indexing(&mut self, entity_id: usize, entity: &HDict) {
913        let tags: Vec<String> = entity.tag_names().map(|s| s.to_string()).collect();
914        self.tag_index.remove(entity_id, &tags);
915        self.adjacency.remove(entity_id);
916
917        // Remove from value indexes.
918        for (name, val) in entity.iter() {
919            if self.value_index.has_index(name) {
920                self.value_index.remove(entity_id, name, val);
921            }
922        }
923
924        // Clear columnar data for this entity.
925        self.columnar.clear_entity(entity_id);
926    }
927
928    /// Append a diff to the changelog, capping it at [`MAX_CHANGELOG`] entries.
929    fn push_changelog(&mut self, diff: GraphDiff) {
930        self.changelog.push_back(diff);
931        while self.changelog.len() > MAX_CHANGELOG {
932            self.changelog.pop_front();
933        }
934    }
935}
936
937impl Default for EntityGraph {
938    fn default() -> Self {
939        Self::new()
940    }
941}
942
943/// Extract the ref value string from an entity's `id` tag.
944fn extract_ref_val(entity: &HDict) -> Result<String, GraphError> {
945    match entity.get("id") {
946        Some(Kind::Ref(r)) => Ok(r.val.clone()),
947        Some(_) => Err(GraphError::InvalidId),
948        None => Err(GraphError::MissingId),
949    }
950}
951
952#[cfg(test)]
953mod tests {
954    use super::*;
955    use crate::kinds::Number;
956
957    fn make_site(id: &str) -> HDict {
958        let mut d = HDict::new();
959        d.set("id", Kind::Ref(HRef::from_val(id)));
960        d.set("site", Kind::Marker);
961        d.set("dis", Kind::Str(format!("Site {id}")));
962        d.set(
963            "area",
964            Kind::Number(Number::new(4500.0, Some("ft\u{00b2}".into()))),
965        );
966        d
967    }
968
969    fn make_equip(id: &str, site_ref: &str) -> HDict {
970        let mut d = HDict::new();
971        d.set("id", Kind::Ref(HRef::from_val(id)));
972        d.set("equip", Kind::Marker);
973        d.set("dis", Kind::Str(format!("Equip {id}")));
974        d.set("siteRef", Kind::Ref(HRef::from_val(site_ref)));
975        d
976    }
977
978    fn make_point(id: &str, equip_ref: &str) -> HDict {
979        let mut d = HDict::new();
980        d.set("id", Kind::Ref(HRef::from_val(id)));
981        d.set("point", Kind::Marker);
982        d.set("sensor", Kind::Marker);
983        d.set("temp", Kind::Marker);
984        d.set("dis", Kind::Str(format!("Point {id}")));
985        d.set("equipRef", Kind::Ref(HRef::from_val(equip_ref)));
986        d.set(
987            "curVal",
988            Kind::Number(Number::new(72.5, Some("\u{00b0}F".into()))),
989        );
990        d
991    }
992
993    // ── Add tests ──
994
995    #[test]
996    fn add_entity_with_valid_id() {
997        let mut g = EntityGraph::new();
998        let result = g.add(make_site("site-1"));
999        assert!(result.is_ok());
1000        assert_eq!(result.unwrap(), "site-1");
1001        assert_eq!(g.len(), 1);
1002    }
1003
1004    #[test]
1005    fn add_entity_missing_id_fails() {
1006        let mut g = EntityGraph::new();
1007        let entity = HDict::new();
1008        let err = g.add(entity).unwrap_err();
1009        assert!(matches!(err, GraphError::MissingId));
1010    }
1011
1012    #[test]
1013    fn add_entity_non_ref_id_fails() {
1014        let mut g = EntityGraph::new();
1015        let mut entity = HDict::new();
1016        entity.set("id", Kind::Str("not-a-ref".into()));
1017        let err = g.add(entity).unwrap_err();
1018        assert!(matches!(err, GraphError::InvalidId));
1019    }
1020
1021    #[test]
1022    fn add_duplicate_ref_fails() {
1023        let mut g = EntityGraph::new();
1024        g.add(make_site("site-1")).unwrap();
1025        let err = g.add(make_site("site-1")).unwrap_err();
1026        assert!(matches!(err, GraphError::DuplicateRef(_)));
1027    }
1028
1029    // ── Get tests ──
1030
1031    #[test]
1032    fn get_existing_entity() {
1033        let mut g = EntityGraph::new();
1034        g.add(make_site("site-1")).unwrap();
1035        let entity = g.get("site-1").unwrap();
1036        assert!(entity.has("site"));
1037        assert_eq!(entity.get("dis"), Some(&Kind::Str("Site site-1".into())));
1038    }
1039
1040    #[test]
1041    fn get_missing_entity_returns_none() {
1042        let g = EntityGraph::new();
1043        assert!(g.get("nonexistent").is_none());
1044    }
1045
1046    // ── Update tests ──
1047
1048    #[test]
1049    fn update_merges_changes() {
1050        let mut g = EntityGraph::new();
1051        g.add(make_site("site-1")).unwrap();
1052
1053        let mut changes = HDict::new();
1054        changes.set("dis", Kind::Str("Updated Site".into()));
1055        changes.set("geoCity", Kind::Str("Richmond".into()));
1056        g.update("site-1", changes).unwrap();
1057
1058        let entity = g.get("site-1").unwrap();
1059        assert_eq!(entity.get("dis"), Some(&Kind::Str("Updated Site".into())));
1060        assert_eq!(entity.get("geoCity"), Some(&Kind::Str("Richmond".into())));
1061        assert!(entity.has("site")); // unchanged
1062    }
1063
1064    #[test]
1065    fn update_missing_entity_fails() {
1066        let mut g = EntityGraph::new();
1067        let err = g.update("nonexistent", HDict::new()).unwrap_err();
1068        assert!(matches!(err, GraphError::NotFound(_)));
1069    }
1070
1071    // ── Remove tests ──
1072
1073    #[test]
1074    fn remove_entity() {
1075        let mut g = EntityGraph::new();
1076        g.add(make_site("site-1")).unwrap();
1077        let removed = g.remove("site-1").unwrap();
1078        assert!(removed.has("site"));
1079        assert!(g.get("site-1").is_none());
1080        assert_eq!(g.len(), 0);
1081    }
1082
1083    #[test]
1084    fn remove_missing_entity_fails() {
1085        let mut g = EntityGraph::new();
1086        let err = g.remove("nonexistent").unwrap_err();
1087        assert!(matches!(err, GraphError::NotFound(_)));
1088    }
1089
1090    // ── Version / changelog tests ──
1091
1092    #[test]
1093    fn version_increments_on_mutations() {
1094        let mut g = EntityGraph::new();
1095        assert_eq!(g.version(), 0);
1096
1097        g.add(make_site("site-1")).unwrap();
1098        assert_eq!(g.version(), 1);
1099
1100        g.update("site-1", HDict::new()).unwrap();
1101        assert_eq!(g.version(), 2);
1102
1103        g.remove("site-1").unwrap();
1104        assert_eq!(g.version(), 3);
1105    }
1106
1107    #[test]
1108    fn changelog_records_add_update_remove() {
1109        let mut g = EntityGraph::new();
1110        g.add(make_site("site-1")).unwrap();
1111        g.update("site-1", HDict::new()).unwrap();
1112        g.remove("site-1").unwrap();
1113
1114        let changes = g.changes_since(0);
1115        assert_eq!(changes.len(), 3);
1116        assert_eq!(changes[0].op, DiffOp::Add);
1117        assert_eq!(changes[0].ref_val, "site-1");
1118        assert!(changes[0].old.is_none());
1119        assert!(changes[0].new.is_some());
1120
1121        assert_eq!(changes[1].op, DiffOp::Update);
1122        assert!(changes[1].old.is_some());
1123        assert!(changes[1].new.is_some());
1124
1125        assert_eq!(changes[2].op, DiffOp::Remove);
1126        assert!(changes[2].old.is_some());
1127        assert!(changes[2].new.is_none());
1128    }
1129
1130    #[test]
1131    fn changes_since_returns_subset() {
1132        let mut g = EntityGraph::new();
1133        g.add(make_site("site-1")).unwrap(); // v1
1134        g.add(make_site("site-2")).unwrap(); // v2
1135        g.add(make_site("site-3")).unwrap(); // v3
1136
1137        let since_v2 = g.changes_since(2);
1138        assert_eq!(since_v2.len(), 1);
1139        assert_eq!(since_v2[0].ref_val, "site-3");
1140    }
1141
1142    // ── Container tests ──
1143
1144    #[test]
1145    fn contains_check() {
1146        let mut g = EntityGraph::new();
1147        g.add(make_site("site-1")).unwrap();
1148        assert!(g.contains("site-1"));
1149        assert!(!g.contains("site-2"));
1150    }
1151
1152    #[test]
1153    fn len_and_is_empty() {
1154        let mut g = EntityGraph::new();
1155        assert!(g.is_empty());
1156        assert_eq!(g.len(), 0);
1157
1158        g.add(make_site("site-1")).unwrap();
1159        assert!(!g.is_empty());
1160        assert_eq!(g.len(), 1);
1161    }
1162
1163    // ── Query tests ──
1164
1165    #[test]
1166    fn read_with_simple_has_filter() {
1167        let mut g = EntityGraph::new();
1168        g.add(make_site("site-1")).unwrap();
1169        g.add(make_equip("equip-1", "site-1")).unwrap();
1170
1171        let results = g.read_all("site", 0).unwrap();
1172        assert_eq!(results.len(), 1);
1173        assert!(results[0].has("site"));
1174    }
1175
1176    #[test]
1177    fn read_with_comparison_filter() {
1178        let mut g = EntityGraph::new();
1179        g.add(make_point("pt-1", "equip-1")).unwrap();
1180
1181        let results = g.read_all("curVal > 70\u{00b0}F", 0).unwrap();
1182        assert_eq!(results.len(), 1);
1183    }
1184
1185    #[test]
1186    fn read_with_and_filter() {
1187        let mut g = EntityGraph::new();
1188        g.add(make_point("pt-1", "equip-1")).unwrap();
1189        g.add(make_equip("equip-1", "site-1")).unwrap();
1190
1191        let results = g.read_all("point and sensor", 0).unwrap();
1192        assert_eq!(results.len(), 1);
1193    }
1194
1195    #[test]
1196    fn read_with_or_filter() {
1197        let mut g = EntityGraph::new();
1198        g.add(make_site("site-1")).unwrap();
1199        g.add(make_equip("equip-1", "site-1")).unwrap();
1200
1201        let results = g.read_all("site or equip", 0).unwrap();
1202        assert_eq!(results.len(), 2);
1203    }
1204
1205    #[test]
1206    fn read_limit_parameter_works() {
1207        let mut g = EntityGraph::new();
1208        g.add(make_site("site-1")).unwrap();
1209        g.add(make_site("site-2")).unwrap();
1210        g.add(make_site("site-3")).unwrap();
1211
1212        let results = g.read_all("site", 2).unwrap();
1213        assert_eq!(results.len(), 2);
1214    }
1215
1216    #[test]
1217    fn read_returns_grid() {
1218        let mut g = EntityGraph::new();
1219        g.add(make_site("site-1")).unwrap();
1220        g.add(make_site("site-2")).unwrap();
1221
1222        let grid = g.read("site", 0).unwrap();
1223        assert_eq!(grid.len(), 2);
1224        assert!(grid.col("site").is_some());
1225        assert!(grid.col("id").is_some());
1226    }
1227
1228    #[test]
1229    fn read_invalid_filter() {
1230        let g = EntityGraph::new();
1231        let err = g.read("!!!", 0).unwrap_err();
1232        assert!(matches!(err, GraphError::Filter(_)));
1233    }
1234
1235    #[test]
1236    fn query_cache_returns_same_results() {
1237        let mut g = EntityGraph::new();
1238        g.add(make_site("site-1")).unwrap();
1239        g.add(make_equip("equip-1", "site-1")).unwrap();
1240        g.add(make_point("pt-1", "equip-1")).unwrap();
1241
1242        // First call populates cache
1243        let results1 = g.read_all("site", 0).unwrap();
1244        assert_eq!(results1.len(), 1);
1245
1246        // Second call should hit cache and return same results
1247        let results2 = g.read_all("site", 0).unwrap();
1248        assert_eq!(results2.len(), 1);
1249        assert_eq!(results1[0].get("id"), results2[0].get("id"));
1250    }
1251
1252    #[test]
1253    fn query_cache_invalidated_by_mutation() {
1254        let mut g = EntityGraph::new();
1255        g.add(make_site("site-1")).unwrap();
1256
1257        let results = g.read_all("site", 0).unwrap();
1258        assert_eq!(results.len(), 1);
1259
1260        // Add another site — cache should be invalidated by version bump
1261        g.add(make_site("site-2")).unwrap();
1262
1263        let results = g.read_all("site", 0).unwrap();
1264        assert_eq!(results.len(), 2);
1265    }
1266
1267    // ── Ref traversal tests ──
1268
1269    #[test]
1270    fn refs_from_returns_targets() {
1271        let mut g = EntityGraph::new();
1272        g.add(make_site("site-1")).unwrap();
1273        g.add(make_equip("equip-1", "site-1")).unwrap();
1274
1275        let targets = g.refs_from("equip-1", None);
1276        assert_eq!(targets, vec!["site-1".to_string()]);
1277    }
1278
1279    #[test]
1280    fn refs_to_returns_sources() {
1281        let mut g = EntityGraph::new();
1282        g.add(make_site("site-1")).unwrap();
1283        g.add(make_equip("equip-1", "site-1")).unwrap();
1284        g.add(make_equip("equip-2", "site-1")).unwrap();
1285
1286        let mut sources = g.refs_to("site-1", None);
1287        sources.sort();
1288        assert_eq!(sources.len(), 2);
1289    }
1290
1291    #[test]
1292    fn type_filtered_ref_queries() {
1293        let mut g = EntityGraph::new();
1294        g.add(make_site("site-1")).unwrap();
1295        g.add(make_equip("equip-1", "site-1")).unwrap();
1296
1297        let targets = g.refs_from("equip-1", Some("siteRef"));
1298        assert_eq!(targets, vec!["site-1".to_string()]);
1299
1300        let targets = g.refs_from("equip-1", Some("equipRef"));
1301        assert!(targets.is_empty());
1302    }
1303
1304    #[test]
1305    fn refs_from_nonexistent_entity() {
1306        let g = EntityGraph::new();
1307        assert!(g.refs_from("nonexistent", None).is_empty());
1308    }
1309
1310    #[test]
1311    fn refs_to_nonexistent_entity() {
1312        let g = EntityGraph::new();
1313        assert!(g.refs_to("nonexistent", None).is_empty());
1314    }
1315
1316    // ── Serialization tests ──
1317
1318    #[test]
1319    fn from_grid_round_trip() {
1320        let mut g = EntityGraph::new();
1321        g.add(make_site("site-1")).unwrap();
1322        g.add(make_equip("equip-1", "site-1")).unwrap();
1323
1324        let grid = g.to_grid("site or equip").unwrap();
1325        assert_eq!(grid.len(), 2);
1326
1327        let g2 = EntityGraph::from_grid(&grid, None).unwrap();
1328        assert_eq!(g2.len(), 2);
1329        assert!(g2.contains("site-1"));
1330        assert!(g2.contains("equip-1"));
1331    }
1332
1333    #[test]
1334    fn to_grid_empty_result() {
1335        let g = EntityGraph::new();
1336        let grid = g.to_grid("site").unwrap();
1337        assert!(grid.is_empty());
1338    }
1339
1340    // ── Update re-indexes correctly ──
1341
1342    #[test]
1343    fn update_reindexes_tags() {
1344        let mut g = EntityGraph::new();
1345        g.add(make_site("site-1")).unwrap();
1346
1347        // Should find the site with "site" filter.
1348        assert_eq!(g.read_all("site", 0).unwrap().len(), 1);
1349
1350        // Remove the "site" marker via update.
1351        let mut changes = HDict::new();
1352        changes.set("site", Kind::Remove);
1353        g.update("site-1", changes).unwrap();
1354
1355        // Should no longer match "site" filter.
1356        assert_eq!(g.read_all("site", 0).unwrap().len(), 0);
1357    }
1358
1359    #[test]
1360    fn update_reindexes_refs() {
1361        let mut g = EntityGraph::new();
1362        g.add(make_site("site-1")).unwrap();
1363        g.add(make_site("site-2")).unwrap();
1364        g.add(make_equip("equip-1", "site-1")).unwrap();
1365
1366        // Initially equip-1 points to site-1.
1367        assert_eq!(g.refs_from("equip-1", None), vec!["site-1".to_string()]);
1368
1369        // Move equip-1 to site-2.
1370        let mut changes = HDict::new();
1371        changes.set("siteRef", Kind::Ref(HRef::from_val("site-2")));
1372        g.update("equip-1", changes).unwrap();
1373
1374        assert_eq!(g.refs_from("equip-1", None), vec!["site-2".to_string()]);
1375        assert!(g.refs_to("site-1", None).is_empty());
1376    }
1377
1378    // ── Dangling ref validation ──
1379
1380    #[test]
1381    fn validate_detects_dangling_refs() {
1382        let mut g = EntityGraph::new();
1383        g.add(make_site("site-1")).unwrap();
1384        // equip-1 has siteRef pointing to "site-1" (exists) — no issue
1385        g.add(make_equip("equip-1", "site-1")).unwrap();
1386        // equip-2 has siteRef pointing to "site-999" (does not exist) — dangling
1387        g.add(make_equip("equip-2", "site-999")).unwrap();
1388
1389        let issues = g.validate();
1390        assert!(!issues.is_empty());
1391
1392        let dangling: Vec<_> = issues
1393            .iter()
1394            .filter(|i| i.issue_type == "dangling_ref")
1395            .collect();
1396        assert_eq!(dangling.len(), 1);
1397        assert_eq!(dangling[0].entity.as_deref(), Some("equip-2"));
1398        assert!(dangling[0].detail.contains("site-999"));
1399        assert!(dangling[0].detail.contains("siteRef"));
1400    }
1401
1402    // ── Empty filter exports all ──
1403
1404    #[test]
1405    fn to_grid_empty_filter_exports_all() {
1406        let mut g = EntityGraph::new();
1407        g.add(make_site("site-1")).unwrap();
1408        g.add(make_equip("equip-1", "site-1")).unwrap();
1409        g.add(make_point("pt-1", "equip-1")).unwrap();
1410
1411        let grid = g.to_grid("").unwrap();
1412        assert_eq!(grid.len(), 3);
1413        assert!(grid.col("id").is_some());
1414    }
1415
1416    // ── from_grid skips rows without id ──
1417
1418    #[test]
1419    fn changelog_bounded_to_max_size() {
1420        let mut graph = EntityGraph::new();
1421        // Add more entities than MAX_CHANGELOG
1422        for i in 0..12_000 {
1423            let mut d = HDict::new();
1424            d.set("id", Kind::Ref(HRef::from_val(format!("e{i}"))));
1425            d.set("dis", Kind::Str(format!("Entity {i}")));
1426            graph.add(d).unwrap();
1427        }
1428        // Changelog should be capped
1429        assert!(graph.changes_since(0).len() <= 10_000);
1430        // Latest changes should still be present
1431        assert!(graph.changes_since(11_999).len() <= 1);
1432    }
1433
1434    #[test]
1435    fn from_grid_skips_rows_without_id() {
1436        let cols = vec![HCol::new("id"), HCol::new("dis"), HCol::new("site")];
1437
1438        let mut row_with_id = HDict::new();
1439        row_with_id.set("id", Kind::Ref(HRef::from_val("site-1")));
1440        row_with_id.set("site", Kind::Marker);
1441        row_with_id.set("dis", Kind::Str("Has ID".into()));
1442
1443        // Row with string id (not a Ref) — should be skipped.
1444        let mut row_bad_id = HDict::new();
1445        row_bad_id.set("id", Kind::Str("not-a-ref".into()));
1446        row_bad_id.set("dis", Kind::Str("Bad ID".into()));
1447
1448        // Row with no id at all — should be skipped.
1449        let mut row_no_id = HDict::new();
1450        row_no_id.set("dis", Kind::Str("No ID".into()));
1451
1452        let grid = HGrid::from_parts(HDict::new(), cols, vec![row_with_id, row_bad_id, row_no_id]);
1453        let g = EntityGraph::from_grid(&grid, None).unwrap();
1454
1455        assert_eq!(g.len(), 1);
1456        assert!(g.contains("site-1"));
1457    }
1458
1459    // ── Graph traversal method tests ──
1460
1461    fn build_hierarchy_graph() -> EntityGraph {
1462        let mut g = EntityGraph::new();
1463        g.add(make_site("s1")).unwrap();
1464        g.add(make_site("s2")).unwrap();
1465        g.add(make_equip("e1", "s1")).unwrap();
1466        g.add(make_equip("e2", "s1")).unwrap();
1467        g.add(make_equip("e3", "s2")).unwrap();
1468        g.add(make_point("p1", "e1")).unwrap();
1469        g.add(make_point("p2", "e1")).unwrap();
1470        g.add(make_point("p3", "e2")).unwrap();
1471        g
1472    }
1473
1474    #[test]
1475    fn all_edges_returns_all_ref_relationships() {
1476        let g = build_hierarchy_graph();
1477        let edges = g.all_edges();
1478        // e1->s1 (siteRef), e2->s1, e3->s2, p1->e1 (equipRef), p2->e1, p3->e2
1479        assert_eq!(edges.len(), 6);
1480        assert!(
1481            edges
1482                .iter()
1483                .any(|(s, t, d)| s == "e1" && t == "siteRef" && d == "s1")
1484        );
1485        assert!(
1486            edges
1487                .iter()
1488                .any(|(s, t, d)| s == "p1" && t == "equipRef" && d == "e1")
1489        );
1490    }
1491
1492    #[test]
1493    fn all_edges_empty_graph() {
1494        let g = EntityGraph::new();
1495        assert!(g.all_edges().is_empty());
1496    }
1497
1498    #[test]
1499    fn neighbors_one_hop() {
1500        let g = build_hierarchy_graph();
1501        let (entities, edges) = g.neighbors("e1", 1, None);
1502        // e1 + s1 (forward via siteRef) + p1, p2 (reverse from equipRef)
1503        let ids: Vec<String> = entities
1504            .iter()
1505            .filter_map(|e| e.id().map(|r| r.val.clone()))
1506            .collect();
1507        assert!(ids.contains(&"e1".to_string()));
1508        assert!(ids.contains(&"s1".to_string()));
1509        assert!(ids.contains(&"p1".to_string()));
1510        assert!(ids.contains(&"p2".to_string()));
1511        assert!(!edges.is_empty());
1512    }
1513
1514    #[test]
1515    fn neighbors_with_ref_type_filter() {
1516        let g = build_hierarchy_graph();
1517        let (entities, edges) = g.neighbors("e1", 1, Some(&["siteRef"]));
1518        // Only forward siteRef edge: e1->s1
1519        let ids: Vec<String> = entities
1520            .iter()
1521            .filter_map(|e| e.id().map(|r| r.val.clone()))
1522            .collect();
1523        assert!(ids.contains(&"e1".to_string()));
1524        assert!(ids.contains(&"s1".to_string()));
1525        // Should not include p1/p2 (those connect via equipRef)
1526        assert!(!ids.contains(&"p1".to_string()));
1527        // Edges should only contain siteRef
1528        assert!(edges.iter().all(|(_, tag, _)| tag == "siteRef"));
1529    }
1530
1531    #[test]
1532    fn neighbors_zero_hops() {
1533        let g = build_hierarchy_graph();
1534        let (entities, edges) = g.neighbors("e1", 0, None);
1535        assert_eq!(entities.len(), 1);
1536        assert!(edges.is_empty());
1537    }
1538
1539    #[test]
1540    fn neighbors_nonexistent_entity() {
1541        let g = build_hierarchy_graph();
1542        let (entities, _) = g.neighbors("nonexistent", 1, None);
1543        assert!(entities.is_empty());
1544    }
1545
1546    #[test]
1547    fn shortest_path_direct() {
1548        let g = build_hierarchy_graph();
1549        let path = g.shortest_path("e1", "s1");
1550        assert_eq!(path, vec!["e1".to_string(), "s1".to_string()]);
1551    }
1552
1553    #[test]
1554    fn shortest_path_two_hops() {
1555        let g = build_hierarchy_graph();
1556        let path = g.shortest_path("p1", "s1");
1557        // p1 -> e1 -> s1
1558        assert_eq!(path.len(), 3);
1559        assert_eq!(path[0], "p1");
1560        assert_eq!(path[2], "s1");
1561    }
1562
1563    #[test]
1564    fn shortest_path_same_node() {
1565        let g = build_hierarchy_graph();
1566        let path = g.shortest_path("s1", "s1");
1567        assert_eq!(path, vec!["s1".to_string()]);
1568    }
1569
1570    #[test]
1571    fn shortest_path_no_connection() {
1572        // s1 and s2 are not connected to each other directly
1573        let g = build_hierarchy_graph();
1574        let path = g.shortest_path("s1", "s2");
1575        // They are disconnected (no edges between them)
1576        assert!(path.is_empty());
1577    }
1578
1579    #[test]
1580    fn shortest_path_nonexistent() {
1581        let g = build_hierarchy_graph();
1582        let path = g.shortest_path("s1", "nonexistent");
1583        assert!(path.is_empty());
1584    }
1585
1586    #[test]
1587    fn subtree_from_site() {
1588        let g = build_hierarchy_graph();
1589        let tree = g.subtree("s1", 10);
1590        // s1 (depth 0), e1 (1), e2 (1), p1 (2), p2 (2), p3 (2)
1591        assert_eq!(tree.len(), 6);
1592        // Root is at depth 0
1593        assert_eq!(tree[0].0.id().unwrap().val, "s1");
1594        assert_eq!(tree[0].1, 0);
1595        // Equips at depth 1
1596        let depth_1: Vec<_> = tree.iter().filter(|(_, d)| *d == 1).collect();
1597        assert_eq!(depth_1.len(), 2);
1598        // Points at depth 2
1599        let depth_2: Vec<_> = tree.iter().filter(|(_, d)| *d == 2).collect();
1600        assert_eq!(depth_2.len(), 3);
1601    }
1602
1603    #[test]
1604    fn subtree_max_depth_1() {
1605        let g = build_hierarchy_graph();
1606        let tree = g.subtree("s1", 1);
1607        // s1 (depth 0), e1 (1), e2 (1) — no points (depth 2)
1608        assert_eq!(tree.len(), 3);
1609    }
1610
1611    #[test]
1612    fn subtree_nonexistent_root() {
1613        let g = build_hierarchy_graph();
1614        let tree = g.subtree("nonexistent", 10);
1615        assert!(tree.is_empty());
1616    }
1617
1618    #[test]
1619    fn subtree_leaf_node() {
1620        let g = build_hierarchy_graph();
1621        let tree = g.subtree("p1", 10);
1622        // p1 has no children referencing it
1623        assert_eq!(tree.len(), 1);
1624        assert_eq!(tree[0].0.id().unwrap().val, "p1");
1625    }
1626}