Skip to main content

haystack_core/graph/
entity_graph.rs

1// EntityGraph — in-memory entity store with bitmap indexing and ref adjacency.
2
3use std::collections::{HashMap, HashSet};
4
5use indexmap::IndexMap;
6use parking_lot::Mutex;
7use roaring::RoaringBitmap;
8
9use crate::data::{HCol, HDict, HGrid};
10use crate::filter::{CmpOp, FilterNode, matches_with_ns, parse_filter};
11use crate::kinds::{HRef, Kind};
12use crate::ontology::{DefNamespace, ValidationIssue};
13
14use super::adjacency::RefAdjacency;
15use super::bitmap::TagBitmapIndex;
16use super::changelog::{ChangelogGap, DiffOp, GraphDiff};
17use super::value_index::ValueIndex;
18
19/// Errors returned by EntityGraph operations.
20#[derive(Debug, thiserror::Error)]
21pub enum GraphError {
22    #[error("entity missing 'id' tag")]
23    MissingId,
24    #[error("entity id must be a Ref")]
25    InvalidId,
26    #[error("entity already exists: {0}")]
27    DuplicateRef(String),
28    #[error("entity not found: {0}")]
29    NotFound(String),
30    #[error("filter error: {0}")]
31    Filter(String),
32    #[error("entity ID space exhausted (max {MAX_ENTITY_ID})")]
33    IdExhausted,
34}
35
36/// Maximum entity ID — constrained by RoaringBitmap (u32) and snapshot format.
37const MAX_ENTITY_ID: usize = u32::MAX as usize;
38
39/// Core entity graph with bitmap tag indexing and bidirectional ref adjacency.
40pub struct EntityGraph {
41    /// ref_val -> entity dict
42    entities: HashMap<String, HDict>,
43    /// ref_val -> internal numeric id (for bitmap indexing)
44    id_map: HashMap<String, usize>,
45    /// internal numeric id -> ref_val
46    reverse_id: HashMap<usize, String>,
47    /// Next internal id to assign.
48    next_id: usize,
49    /// Freelist of recycled entity IDs from removed entities.
50    free_ids: Vec<usize>,
51    /// Tag bitmap index for fast has/missing queries.
52    tag_index: TagBitmapIndex,
53    /// Bidirectional ref adjacency for graph traversal.
54    adjacency: RefAdjacency,
55    /// Optional ontology namespace for spec-aware operations.
56    namespace: Option<DefNamespace>,
57    /// Monotonic version counter, incremented on every mutation.
58    version: u64,
59    /// Ordered list of mutations.
60    changelog: std::collections::VecDeque<GraphDiff>,
61    /// Maximum number of changelog entries retained.
62    changelog_capacity: usize,
63    /// Lowest version still present in the changelog (0 = no evictions yet).
64    floor_version: u64,
65    /// LRU query cache: (filter, version) → matching ref_vals.
66    /// Uses Mutex for interior mutability since read_all takes &self.
67    query_cache: Mutex<QueryCache>,
68    /// Parsed filter AST cache: filter_string → AST (version-independent).
69    ast_cache: Mutex<HashMap<String, FilterNode>>,
70    /// Optional B-Tree value indexes for comparison-based filter acceleration.
71    value_index: ValueIndex,
72}
73
74/// Fixed-capacity LRU cache for filter query results using IndexMap for O(1) ops.
75struct QueryCache {
76    /// (filter, version) → matching ref_vals. Most-recently-used at the back.
77    entries: IndexMap<(String, u64), Vec<String>>,
78    capacity: usize,
79}
80
81impl QueryCache {
82    fn new(capacity: usize) -> Self {
83        Self {
84            entries: IndexMap::with_capacity(capacity),
85            capacity,
86        }
87    }
88
89    fn get(&mut self, filter: &str, version: u64) -> Option<&[String]> {
90        // Move to back (most recently used) on access.
91        let key = (filter.to_string(), version);
92        let idx = self.entries.get_index_of(&key)?;
93        self.entries.move_index(idx, self.entries.len() - 1);
94        self.entries.get(&key).map(|v| v.as_slice())
95    }
96
97    fn insert(&mut self, filter: String, version: u64, ref_vals: Vec<String>) {
98        if self.entries.len() >= self.capacity {
99            // First try purging stale entries from older versions.
100            self.purge_stale(version);
101        }
102        if self.entries.len() >= self.capacity {
103            // Still at capacity — evict least recently used (front).
104            self.entries.shift_remove_index(0);
105        }
106        self.entries.insert((filter, version), ref_vals);
107    }
108
109    /// Remove all entries whose version is older than `min_version`.
110    fn purge_stale(&mut self, min_version: u64) {
111        self.entries
112            .retain(|(_filter, version), _| *version >= min_version);
113    }
114}
115
116/// Compute query cache capacity based on entity count.
117/// Scales with graph size but bounded: min 256, max 1024.
118fn query_cache_capacity_for(entity_count: usize) -> usize {
119    (entity_count / 100).clamp(256, 1024)
120}
121
122const DEFAULT_QUERY_CACHE_CAPACITY: usize = 256;
123
124/// Maximum number of entries in the parsed-AST cache before it is cleared.
125const MAX_AST_CACHE_SIZE: usize = 10_000;
126
127/// Common Haystack fields to auto-index for O(log N) value lookups.
128const AUTO_INDEX_FIELDS: &[&str] = &[
129    "siteRef", "equipRef", "dis", "curVal", "area", "geoCity", "kind", "unit",
130];
131
132impl EntityGraph {
133    /// Create an empty entity graph with standard Haystack fields auto-indexed
134    /// and default changelog capacity (50,000).
135    pub fn new() -> Self {
136        Self::with_changelog_capacity(super::changelog::DEFAULT_CHANGELOG_CAPACITY)
137    }
138
139    /// Create an empty entity graph with a custom changelog capacity.
140    pub fn with_changelog_capacity(capacity: usize) -> Self {
141        let capacity = capacity.max(1); // Ensure at least 1 entry
142        let mut value_index = ValueIndex::new();
143        for field in AUTO_INDEX_FIELDS {
144            value_index.index_field(field);
145        }
146        Self {
147            entities: HashMap::new(),
148            id_map: HashMap::new(),
149            reverse_id: HashMap::new(),
150            next_id: 0,
151            free_ids: Vec::new(),
152            tag_index: TagBitmapIndex::new(),
153            adjacency: RefAdjacency::new(),
154            namespace: None,
155            version: 0,
156            changelog: std::collections::VecDeque::new(),
157            changelog_capacity: capacity,
158            floor_version: 0,
159            query_cache: Mutex::new(QueryCache::new(DEFAULT_QUERY_CACHE_CAPACITY)),
160            ast_cache: Mutex::new(HashMap::new()),
161            value_index,
162        }
163    }
164
165    /// Create an entity graph with an ontology namespace.
166    pub fn with_namespace(ns: DefNamespace) -> Self {
167        Self {
168            namespace: Some(ns),
169            ..Self::new()
170        }
171    }
172
173    // ── Value Indexes ──
174
175    /// Register a field for B-Tree value indexing. Enables O(log N) range
176    /// queries (e.g. `temp > 72`) for this field. Must be called before
177    /// entities are added, or followed by `rebuild_value_index` for existing data.
178    pub fn index_field(&mut self, field: &str) {
179        self.value_index.index_field(field);
180    }
181
182    /// Rebuild the value index for all indexed fields from the current entities.
183    pub fn rebuild_value_index(&mut self) {
184        self.value_index.clear();
185        for (ref_val, entity) in &self.entities {
186            if let Some(&eid) = self.id_map.get(ref_val.as_str()) {
187                for (name, val) in entity.iter() {
188                    if self.value_index.has_index(name) {
189                        self.value_index.add(eid, name, val);
190                    }
191                }
192            }
193        }
194    }
195
196    /// Returns a reference to the value index.
197    pub fn value_index(&self) -> &ValueIndex {
198        &self.value_index
199    }
200
201    // ── CRUD ──
202
203    /// Add an entity to the graph.
204    ///
205    /// The entity must have an `id` tag that is a `Ref`. Returns the ref
206    /// value string on success.
207    pub fn add(&mut self, entity: HDict) -> Result<String, GraphError> {
208        let ref_val = extract_ref_val(&entity)?;
209
210        if self.entities.contains_key(&ref_val) {
211            return Err(GraphError::DuplicateRef(ref_val));
212        }
213
214        let eid = if let Some(recycled) = self.free_ids.pop() {
215            recycled
216        } else {
217            if self.next_id > MAX_ENTITY_ID {
218                return Err(GraphError::IdExhausted);
219            }
220            let id = self.next_id;
221            self.next_id = self.next_id.checked_add(1).ok_or(GraphError::InvalidId)?;
222            id
223        };
224
225        self.id_map.insert(ref_val.clone(), eid);
226        self.reverse_id.insert(eid, ref_val.clone());
227
228        // Index before inserting (borrows entity immutably, self mutably).
229        self.index_tags(eid, &entity);
230        self.index_refs(eid, &entity);
231
232        // Clone for the changelog, then move the entity into the map.
233        let entity_for_log = entity.clone();
234        self.entities.insert(ref_val.clone(), entity);
235
236        self.version += 1;
237        self.push_changelog(GraphDiff {
238            version: self.version,
239            timestamp: 0,
240            op: DiffOp::Add,
241            ref_val: ref_val.clone(),
242            old: None,
243            new: Some(entity_for_log),
244            changed_tags: None,
245            previous_tags: None,
246        });
247
248        // Resize query cache if entity count crossed a threshold.
249        let target_cap = query_cache_capacity_for(self.entities.len());
250        let mut cache = self.query_cache.lock();
251        if cache.capacity < target_cap {
252            cache.capacity = target_cap;
253        }
254
255        Ok(ref_val)
256    }
257
258    /// Get a reference to an entity by ref value.
259    pub fn get(&self, ref_val: &str) -> Option<&HDict> {
260        self.entities.get(ref_val)
261    }
262
263    /// Update an existing entity by merging `changes` into it.
264    ///
265    /// Tags in `changes` overwrite existing tags; `Kind::Remove` tags are
266    /// deleted. The `id` tag cannot be changed.
267    pub fn update(&mut self, ref_val: &str, changes: HDict) -> Result<(), GraphError> {
268        if changes.is_empty() {
269            return Ok(());
270        }
271
272        let eid = *self
273            .id_map
274            .get(ref_val)
275            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
276
277        let mut old_entity = self
278            .entities
279            .remove(ref_val)
280            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
281
282        // Compute delta for changelog before mutating.
283        let mut prev_tags = HDict::new();
284        let mut changed = HDict::new();
285        for (key, new_val) in changes.iter() {
286            if let Some(old_val) = old_entity.get(key) {
287                prev_tags.set(key, old_val.clone());
288            }
289            changed.set(key, new_val.clone());
290        }
291
292        // Clone old for delta comparison, then merge.
293        let old_snapshot = old_entity.clone();
294        old_entity.merge(&changes);
295
296        // Delta indexing: only update what changed.
297        self.update_tags_delta(eid, &old_snapshot, &old_entity);
298
299        // Re-index refs only if ref edges changed.
300        if Self::refs_changed(&old_snapshot, &old_entity) {
301            self.adjacency.remove(eid);
302            self.index_refs(eid, &old_entity);
303        }
304
305        self.entities.insert(ref_val.to_string(), old_entity);
306
307        self.version += 1;
308        self.push_changelog(GraphDiff {
309            version: self.version,
310            timestamp: 0,
311            op: DiffOp::Update,
312            ref_val: ref_val.to_string(),
313            old: None,
314            new: None,
315            changed_tags: Some(changed),
316            previous_tags: Some(prev_tags),
317        });
318
319        Ok(())
320    }
321
322    /// Remove an entity from the graph. Returns the removed entity.
323    pub fn remove(&mut self, ref_val: &str) -> Result<HDict, GraphError> {
324        let eid = self
325            .id_map
326            .remove(ref_val)
327            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
328
329        self.reverse_id.remove(&eid);
330
331        let entity = self
332            .entities
333            .remove(ref_val)
334            .ok_or_else(|| GraphError::NotFound(ref_val.to_string()))?;
335
336        self.remove_indexing(eid, &entity);
337        self.free_ids.push(eid);
338
339        self.version += 1;
340        self.push_changelog(GraphDiff {
341            version: self.version,
342            timestamp: 0,
343            op: DiffOp::Remove,
344            ref_val: ref_val.to_string(),
345            old: Some(entity.clone()),
346            new: None,
347            changed_tags: None,
348            previous_tags: None,
349        });
350
351        Ok(entity)
352    }
353
354    // ── Query ──
355
356    /// Run a filter expression and return matching entities as a grid.
357    pub fn read(&self, filter_expr: &str, limit: usize) -> Result<HGrid, GraphError> {
358        let results = self.read_all(filter_expr, limit)?;
359
360        if results.is_empty() {
361            return Ok(HGrid::new());
362        }
363
364        // Collect all unique column names.
365        let mut seen: std::collections::HashSet<String> =
366            std::collections::HashSet::with_capacity(results.len().min(64));
367        for entity in &results {
368            for name in entity.tag_names() {
369                seen.insert(name.to_string());
370            }
371        }
372        let mut col_set: Vec<String> = seen.into_iter().collect();
373        col_set.sort();
374        let cols: Vec<HCol> = col_set.iter().map(|n| HCol::new(n.as_str())).collect();
375        let rows: Vec<HDict> = results.into_iter().cloned().collect();
376
377        Ok(HGrid::from_parts(HDict::new(), cols, rows))
378    }
379
380    /// Run a filter expression and return matching entities as references.
381    pub fn read_all(&self, filter_expr: &str, limit: usize) -> Result<Vec<&HDict>, GraphError> {
382        let effective_limit = if limit == 0 { usize::MAX } else { limit };
383
384        // Check query cache (version-keyed, so mutations auto-invalidate).
385        {
386            let mut cache = self.query_cache.lock();
387            if let Some(cached_refs) = cache.get(filter_expr, self.version) {
388                let mut results = Vec::new();
389                for rv in cached_refs {
390                    if results.len() >= effective_limit {
391                        break;
392                    }
393                    if let Some(entity) = self.entities.get(rv) {
394                        results.push(entity);
395                    }
396                }
397                return Ok(results);
398            }
399        }
400
401        // Use cached AST or parse and cache it (ASTs are version-independent).
402        let ast = {
403            let mut ast_cache = self.ast_cache.lock();
404            if let Some(cached) = ast_cache.get(filter_expr) {
405                cached.clone()
406            } else {
407                let parsed =
408                    parse_filter(filter_expr).map_err(|e| GraphError::Filter(e.to_string()))?;
409                if ast_cache.len() >= MAX_AST_CACHE_SIZE {
410                    ast_cache.clear();
411                }
412                ast_cache.insert(filter_expr.to_string(), parsed.clone());
413                parsed
414            }
415        };
416
417        // Phase 1: bitmap acceleration (tag + value index).
418        let universe = RoaringBitmap::from_iter(self.id_map.values().map(|&id| id as u32));
419        let candidates = bitmap_candidates(&ast, &self.tag_index, &self.value_index, &universe);
420
421        // Phase 2: full filter evaluation.
422        let resolver = |r: &HRef| -> Option<&HDict> { self.entities.get(&r.val) };
423        let ns = self.namespace.as_ref();
424
425        let mut results: Vec<&HDict>;
426
427        if let Some(ref bitmap) = candidates {
428            results = Vec::new();
429            for eid in TagBitmapIndex::iter_set_bits(bitmap) {
430                if results.len() >= effective_limit {
431                    break;
432                }
433                if let Some(ref_val) = self.reverse_id.get(&eid)
434                    && let Some(entity) = self.entities.get(ref_val)
435                    && matches_with_ns(&ast, entity, Some(&resolver), ns)
436                {
437                    results.push(entity);
438                }
439            }
440        } else {
441            results = Vec::new();
442            for entity in self.entities.values() {
443                if results.len() >= effective_limit {
444                    break;
445                }
446                if matches_with_ns(&ast, entity, Some(&resolver), ns) {
447                    results.push(entity);
448                }
449            }
450        }
451
452        if results.len() > effective_limit {
453            results.truncate(effective_limit);
454        }
455
456        // Populate cache with result ref_vals (only for unlimited queries to
457        // avoid caching partial results that depend on limit).
458        if limit == 0 {
459            let ref_vals: Vec<String> = results
460                .iter()
461                .filter_map(|e| {
462                    e.get("id").and_then(|k| match k {
463                        Kind::Ref(r) => Some(r.val.clone()),
464                        _ => None,
465                    })
466                })
467                .collect();
468            let mut cache = self.query_cache.lock();
469            cache.insert(filter_expr.to_string(), self.version, ref_vals);
470        }
471
472        Ok(results)
473    }
474
475    // ── Ref traversal ──
476
477    /// Get ref values that the given entity points to.
478    pub fn refs_from(&self, ref_val: &str, ref_type: Option<&str>) -> Vec<String> {
479        match self.id_map.get(ref_val) {
480            Some(&eid) => self.adjacency.targets_from(eid, ref_type),
481            None => Vec::new(),
482        }
483    }
484
485    /// Get ref values of entities that point to the given entity.
486    pub fn refs_to(&self, ref_val: &str, ref_type: Option<&str>) -> Vec<String> {
487        self.adjacency
488            .sources_to(ref_val, ref_type)
489            .iter()
490            .filter_map(|eid| self.reverse_id.get(eid).cloned())
491            .collect()
492    }
493
494    // ── Haystack Hierarchy Helpers ──
495
496    /// Walk a chain of ref tags starting from an entity.
497    ///
498    /// For example, `ref_chain("point-1", &["equipRef", "siteRef"])` follows
499    /// `point-1` → its `equipRef` → that entity's `siteRef`, returning the
500    /// ordered path of resolved entities (excluding the starting entity).
501    pub fn ref_chain(&self, ref_val: &str, ref_tags: &[&str]) -> Vec<&HDict> {
502        let mut result = Vec::with_capacity(ref_tags.len());
503        let mut current = ref_val.to_string();
504        for tag in ref_tags {
505            let entity = match self.entities.get(&current) {
506                Some(e) => e,
507                None => break,
508            };
509            match entity.get(tag) {
510                Some(Kind::Ref(r)) => {
511                    current = r.val.clone();
512                    if let Some(target) = self.entities.get(&current) {
513                        result.push(target);
514                    } else {
515                        break;
516                    }
517                }
518                _ => break,
519            }
520        }
521        result
522    }
523
524    /// Resolve the site for any entity by walking `equipRef` → `siteRef`.
525    ///
526    /// If the entity itself has a `site` marker, returns it directly.
527    /// Otherwise walks the standard Haystack ref chain.
528    pub fn site_for(&self, ref_val: &str) -> Option<&HDict> {
529        let entity = self.entities.get(ref_val)?;
530        // If the entity is itself a site, return it.
531        if entity.has("site") {
532            return Some(entity);
533        }
534        // Check direct siteRef.
535        if let Some(Kind::Ref(r)) = entity.get("siteRef") {
536            return self.entities.get(&r.val);
537        }
538        // Walk equipRef → siteRef.
539        if let Some(Kind::Ref(r)) = entity.get("equipRef")
540            && let Some(equip) = self.entities.get(&r.val)
541            && let Some(Kind::Ref(sr)) = equip.get("siteRef")
542        {
543            return self.entities.get(&sr.val);
544        }
545        None
546    }
547
548    /// All direct children: entities with any ref tag pointing to this entity.
549    pub fn children(&self, ref_val: &str) -> Vec<&HDict> {
550        self.refs_to(ref_val, None)
551            .iter()
552            .filter_map(|r| self.entities.get(r))
553            .collect()
554    }
555
556    /// All points for an equip — children with the `point` marker.
557    ///
558    /// Optionally filter further with a filter expression.
559    pub fn equip_points(
560        &self,
561        equip_ref: &str,
562        filter: Option<&str>,
563    ) -> Result<Vec<&HDict>, GraphError> {
564        let points: Vec<&HDict> = self
565            .children(equip_ref)
566            .into_iter()
567            .filter(|e| e.has("point"))
568            .collect();
569        match filter {
570            Some(expr) => {
571                let ast = crate::filter::parse_filter(expr)
572                    .map_err(|e| GraphError::Filter(e.to_string()))?;
573                Ok(points
574                    .into_iter()
575                    .filter(|e| crate::filter::matches(&ast, e, None))
576                    .collect())
577            }
578            None => Ok(points),
579        }
580    }
581
582    // ── Spec-aware ──
583
584    /// Validate all entities against the namespace and check for dangling refs.
585    ///
586    /// Returns empty if no namespace is set and no dangling refs exist.
587    pub fn validate(&self) -> Vec<ValidationIssue> {
588        let mut issues: Vec<ValidationIssue> = match &self.namespace {
589            Some(ns) => self
590                .entities
591                .values()
592                .flat_map(|e| ns.validate_entity(e))
593                .collect(),
594            None => Vec::new(),
595        };
596
597        // Check for dangling refs: Ref values (except `id`) that point to
598        // entities not present in the graph.
599        for entity in self.entities.values() {
600            let entity_ref = entity.id().map(|r| r.val.as_str());
601            for (name, val) in entity.iter() {
602                if name == "id" {
603                    continue;
604                }
605                if let Kind::Ref(r) = val
606                    && !self.entities.contains_key(&r.val)
607                {
608                    issues.push(ValidationIssue {
609                        entity: entity_ref.map(|s| s.to_string()),
610                        issue_type: "dangling_ref".to_string(),
611                        detail: format!(
612                            "tag '{}' references '{}' which does not exist in the graph",
613                            name, r.val
614                        ),
615                    });
616                }
617            }
618        }
619
620        issues
621    }
622
623    // ── Serialization ──
624
625    /// Convert matching entities to a grid.
626    ///
627    /// If `filter_expr` is empty, exports all entities.
628    /// Otherwise, delegates to `read`.
629    pub fn to_grid(&self, filter_expr: &str) -> Result<HGrid, GraphError> {
630        if filter_expr.is_empty() {
631            let entities: Vec<&HDict> = self.entities.values().collect();
632            return Ok(Self::entities_to_grid(&entities));
633        }
634        self.read(filter_expr, 0)
635    }
636
637    /// Build a grid from a slice of entity references.
638    fn entities_to_grid(entities: &[&HDict]) -> HGrid {
639        if entities.is_empty() {
640            return HGrid::new();
641        }
642
643        let mut col_set: Vec<String> = Vec::new();
644        let mut seen = std::collections::HashSet::new();
645        for entity in entities {
646            for name in entity.tag_names() {
647                if seen.insert(name.to_string()) {
648                    col_set.push(name.to_string());
649                }
650            }
651        }
652        col_set.sort();
653        let cols: Vec<HCol> = col_set.iter().map(|n| HCol::new(n.as_str())).collect();
654        let rows: Vec<HDict> = entities.iter().map(|e| (*e).clone()).collect();
655
656        HGrid::from_parts(HDict::new(), cols, rows)
657    }
658
659    /// Build an EntityGraph from a grid.
660    ///
661    /// Rows without a valid `id` Ref tag are silently skipped.
662    pub fn from_grid(grid: &HGrid, namespace: Option<DefNamespace>) -> Result<Self, GraphError> {
663        let mut graph = match namespace {
664            Some(ns) => Self::with_namespace(ns),
665            None => Self::new(),
666        };
667        for row in &grid.rows {
668            if row.id().is_some() {
669                match graph.add(row.clone()) {
670                    Ok(_) => {}
671                    Err(GraphError::DuplicateRef(_)) => continue,
672                    Err(e) => return Err(e),
673                }
674            }
675        }
676        Ok(graph)
677    }
678
679    // ── Change tracking ──
680
681    /// Get changelog entries since a given version.
682    ///
683    /// Returns `Err(ChangelogGap)` if the requested version has been evicted
684    /// from the changelog, signalling the subscriber must do a full resync.
685    pub fn changes_since(&self, version: u64) -> Result<Vec<&GraphDiff>, ChangelogGap> {
686        let target = version + 1;
687        // If the floor has advanced past the requested version, the subscriber
688        // has fallen behind and missed entries.
689        if self.floor_version > 0 && version < self.floor_version {
690            return Err(ChangelogGap {
691                subscriber_version: version,
692                floor_version: self.floor_version,
693            });
694        }
695        // Binary search: versions are monotonically increasing in the VecDeque.
696        // partition_point finds the first entry where version >= target.
697        let start = self.changelog.partition_point(|d| d.version < target);
698        Ok(self.changelog.iter().skip(start).collect())
699    }
700
701    /// The lowest version still retained in the changelog.
702    ///
703    /// Returns 0 if no entries have been evicted.
704    pub fn floor_version(&self) -> u64 {
705        self.floor_version
706    }
707
708    /// The configured changelog capacity.
709    pub fn changelog_capacity(&self) -> usize {
710        self.changelog_capacity
711    }
712
713    /// Current query cache capacity.
714    pub fn query_cache_capacity(&self) -> usize {
715        self.query_cache.lock().capacity
716    }
717
718    /// Current graph version (monotonically increasing).
719    pub fn version(&self) -> u64 {
720        self.version
721    }
722
723    // ── Container ──
724
725    /// Number of entities in the graph.
726    pub fn len(&self) -> usize {
727        self.entities.len()
728    }
729
730    /// Returns `true` if the graph has no entities.
731    pub fn is_empty(&self) -> bool {
732        self.entities.is_empty()
733    }
734
735    /// Shrink internal collections to fit their current size, reclaiming memory
736    /// from previous bulk removals. Call this after removing many entities.
737    pub fn compact(&mut self) {
738        self.entities.shrink_to_fit();
739        self.id_map.shrink_to_fit();
740        self.reverse_id.shrink_to_fit();
741        self.free_ids.shrink_to_fit();
742    }
743
744    /// Returns `true` if an entity with the given ref value exists.
745    pub fn contains(&self, ref_val: &str) -> bool {
746        self.entities.contains_key(ref_val)
747    }
748
749    /// Returns references to all entities in the graph.
750    pub fn all(&self) -> Vec<&HDict> {
751        self.entities.values().collect()
752    }
753
754    // ── Internal indexing ──
755
756    /// Add tag bitmap entries for an entity.
757    fn index_tags(&mut self, entity_id: usize, entity: &HDict) {
758        let tags: Vec<String> = entity.tag_names().map(|s| s.to_string()).collect();
759        self.tag_index.add(entity_id, &tags);
760
761        // Update value indexes for any indexed fields present on this entity.
762        for (name, val) in entity.iter() {
763            if self.value_index.has_index(name) {
764                self.value_index.add(entity_id, name, val);
765            }
766        }
767    }
768
769    /// Add ref adjacency entries for an entity.
770    fn index_refs(&mut self, entity_id: usize, entity: &HDict) {
771        for (name, val) in entity.iter() {
772            if let Kind::Ref(r) = val {
773                // Skip the "id" tag — it is the entity's own identity,
774                // not a reference edge.
775                if name != "id" {
776                    self.adjacency.add(entity_id, name, &r.val);
777                }
778            }
779        }
780    }
781
782    /// Remove all index entries for an entity.
783    fn remove_indexing(&mut self, entity_id: usize, entity: &HDict) {
784        let tags: Vec<String> = entity.tag_names().map(|s| s.to_string()).collect();
785        self.tag_index.remove(entity_id, &tags);
786        self.adjacency.remove(entity_id);
787
788        // Remove from value indexes.
789        for (name, val) in entity.iter() {
790            if self.value_index.has_index(name) {
791                self.value_index.remove(entity_id, name, val);
792            }
793        }
794    }
795
796    /// Update only the changed tags in the tag bitmap index.
797    fn update_tags_delta(&mut self, entity_id: usize, old: &HDict, new: &HDict) {
798        let old_tags: std::collections::HashSet<&str> = old.tag_names().collect();
799        let new_tags: std::collections::HashSet<&str> = new.tag_names().collect();
800
801        // Tags removed: clear bits.
802        let removed: Vec<String> = old_tags
803            .difference(&new_tags)
804            .map(|s| s.to_string())
805            .collect();
806        if !removed.is_empty() {
807            self.tag_index.remove(entity_id, &removed);
808        }
809
810        // Tags added: set bits.
811        let added: Vec<String> = new_tags
812            .difference(&old_tags)
813            .map(|s| s.to_string())
814            .collect();
815        if !added.is_empty() {
816            self.tag_index.add(entity_id, &added);
817        }
818
819        // Update value indexes for changed fields only.
820        for (name, new_val) in new.iter() {
821            if self.value_index.has_index(name) {
822                if let Some(old_val) = old.get(name) {
823                    if old_val != new_val {
824                        self.value_index.remove(entity_id, name, old_val);
825                        self.value_index.add(entity_id, name, new_val);
826                    }
827                } else {
828                    self.value_index.add(entity_id, name, new_val);
829                }
830            }
831        }
832
833        // Remove value indexes for removed fields.
834        for name in &removed {
835            if self.value_index.has_index(name)
836                && let Some(old_val) = old.get(name.as_str())
837            {
838                self.value_index.remove(entity_id, name, old_val);
839            }
840        }
841    }
842
843    /// Check if ref edges changed between old and new entity.
844    fn refs_changed(old: &HDict, new: &HDict) -> bool {
845        for (name, val) in new.iter() {
846            if name != "id"
847                && let Kind::Ref(_) = val
848                && old.get(name) != Some(val)
849            {
850                return true;
851            }
852        }
853        // Check for removed refs.
854        for (name, val) in old.iter() {
855            if name != "id"
856                && let Kind::Ref(_) = val
857                && new.get(name).is_none()
858            {
859                return true;
860            }
861        }
862        false
863    }
864
865    /// Build a full hierarchy subtree as a structured tree.
866    /// `root` is the entity ref, `max_depth` limits recursion (0 = root only).
867    pub fn hierarchy_tree(&self, root: &str, max_depth: usize) -> Option<HierarchyNode> {
868        let entity = self.entities.get(root)?.clone();
869        let mut visited = HashSet::new();
870        visited.insert(root.to_string());
871        Some(self.build_subtree(root, &entity, 0, max_depth, &mut visited))
872    }
873
874    fn build_subtree(
875        &self,
876        ref_val: &str,
877        entity: &HDict,
878        depth: usize,
879        max_depth: usize,
880        visited: &mut HashSet<String>,
881    ) -> HierarchyNode {
882        let children = if depth < max_depth {
883            self.children(ref_val)
884                .into_iter()
885                .filter_map(|child| {
886                    let child_id = child.id()?.val.clone();
887                    if visited.contains(&child_id) {
888                        return None;
889                    }
890                    visited.insert(child_id.clone());
891                    Some(self.build_subtree(&child_id, child, depth + 1, max_depth, visited))
892                })
893                .collect()
894        } else {
895            Vec::new()
896        };
897        HierarchyNode {
898            entity: entity.clone(),
899            children,
900            depth,
901        }
902    }
903
904    /// Determine the most specific entity type from its markers.
905    ///
906    /// Returns the most specific marker tag that identifies the entity type.
907    /// E.g., an entity with `equip` + `ahu` markers returns `"ahu"` (most specific).
908    pub fn classify(&self, ref_val: &str) -> Option<String> {
909        let entity = self.entities.get(ref_val)?;
910        classify_entity(entity)
911    }
912
913    /// Append a diff to the changelog, capping at the configured capacity.
914    fn push_changelog(&mut self, mut diff: GraphDiff) {
915        diff.timestamp = GraphDiff::now_nanos();
916        self.changelog.push_back(diff);
917        while self.changelog.len() > self.changelog_capacity {
918            if let Some(evicted) = self.changelog.pop_front() {
919                self.floor_version = evicted.version;
920            }
921        }
922    }
923}
924
925impl Default for EntityGraph {
926    fn default() -> Self {
927        Self::new()
928    }
929}
930
931/// A node in a hierarchy tree produced by [`EntityGraph::hierarchy_tree`].
932#[derive(Debug, Clone)]
933pub struct HierarchyNode {
934    pub entity: HDict,
935    pub children: Vec<HierarchyNode>,
936    pub depth: usize,
937}
938
939/// Extract the ref value string from an entity's `id` tag.
940fn extract_ref_val(entity: &HDict) -> Result<String, GraphError> {
941    match entity.get("id") {
942        Some(Kind::Ref(r)) => Ok(r.val.clone()),
943        Some(_) => Err(GraphError::InvalidId),
944        None => Err(GraphError::MissingId),
945    }
946}
947
948/// Priority-ordered list of marker tags from most specific to least specific.
949/// The first match wins.
950const CLASSIFY_PRIORITY: &[&str] = &[
951    // Point subtypes
952    "sensor", "cmd", "sp", // Equipment subtypes
953    "ahu", "vav", "boiler", "chiller", "meter", // Base categories
954    "point", "equip", // Space types
955    "room", "floor", "zone", "space", // Site
956    "site",  // Other well-known
957    "weather", "device", "network",
958];
959
960/// Classify an entity by returning the most specific recognized marker tag.
961fn classify_entity(entity: &HDict) -> Option<String> {
962    for &tag in CLASSIFY_PRIORITY {
963        if entity.has(tag) {
964            return Some(tag.to_string());
965        }
966    }
967    None
968}
969
970/// Compute a candidate bitmap from a filter AST using the tag bitmap index
971/// and value index. Returns `None` when the filter cannot be accelerated
972/// (caller should fall back to a full scan).
973fn bitmap_candidates(
974    ast: &FilterNode,
975    tag_index: &TagBitmapIndex,
976    value_index: &ValueIndex,
977    universe: &RoaringBitmap,
978) -> Option<RoaringBitmap> {
979    match ast {
980        FilterNode::Has(path) => {
981            if path.is_single() {
982                tag_index.has_tag(&path.0[0]).cloned()
983            } else {
984                None
985            }
986        }
987        FilterNode::Missing(path) => {
988            if path.is_single() {
989                tag_index
990                    .has_tag(&path.0[0])
991                    .map(|bm| TagBitmapIndex::negate(bm, universe))
992            } else {
993                None
994            }
995        }
996        FilterNode::Cmp { path, op, val } => {
997            if path.is_single() && value_index.has_index(&path.0[0]) {
998                let field = &path.0[0];
999                let ids = match op {
1000                    CmpOp::Eq => value_index.eq_lookup(field, val),
1001                    CmpOp::Ne => value_index.ne_lookup(field, val),
1002                    CmpOp::Lt => value_index.lt_lookup(field, val),
1003                    CmpOp::Le => value_index.le_lookup(field, val),
1004                    CmpOp::Gt => value_index.gt_lookup(field, val),
1005                    CmpOp::Ge => value_index.ge_lookup(field, val),
1006                };
1007                let mut bm = RoaringBitmap::new();
1008                for id in ids {
1009                    bm.insert(id as u32);
1010                }
1011                Some(bm)
1012            } else {
1013                None
1014            }
1015        }
1016        FilterNode::And(left, right) => {
1017            let l = bitmap_candidates(left, tag_index, value_index, universe);
1018            let r = bitmap_candidates(right, tag_index, value_index, universe);
1019            match (l, r) {
1020                (Some(a), Some(b)) => Some(a & b),
1021                (Some(a), None) => Some(a),
1022                (None, Some(b)) => Some(b),
1023                (None, None) => None,
1024            }
1025        }
1026        FilterNode::Or(left, right) => {
1027            let l = bitmap_candidates(left, tag_index, value_index, universe);
1028            let r = bitmap_candidates(right, tag_index, value_index, universe);
1029            match (l, r) {
1030                (Some(a), Some(b)) => Some(a | b),
1031                // Cannot accelerate OR when one side is unknown (it could match anything).
1032                _ => None,
1033            }
1034        }
1035        FilterNode::SpecMatch(_) => None,
1036    }
1037}
1038
1039#[cfg(test)]
1040mod tests {
1041    use super::*;
1042    use crate::kinds::Number;
1043
1044    fn make_site(id: &str) -> HDict {
1045        let mut d = HDict::new();
1046        d.set("id", Kind::Ref(HRef::from_val(id)));
1047        d.set("site", Kind::Marker);
1048        d.set("dis", Kind::Str(format!("Site {id}")));
1049        d.set(
1050            "area",
1051            Kind::Number(Number::new(4500.0, Some("ft\u{00b2}".into()))),
1052        );
1053        d
1054    }
1055
1056    fn make_equip(id: &str, site_ref: &str) -> HDict {
1057        let mut d = HDict::new();
1058        d.set("id", Kind::Ref(HRef::from_val(id)));
1059        d.set("equip", Kind::Marker);
1060        d.set("dis", Kind::Str(format!("Equip {id}")));
1061        d.set("siteRef", Kind::Ref(HRef::from_val(site_ref)));
1062        d
1063    }
1064
1065    fn make_point(id: &str, equip_ref: &str) -> HDict {
1066        let mut d = HDict::new();
1067        d.set("id", Kind::Ref(HRef::from_val(id)));
1068        d.set("point", Kind::Marker);
1069        d.set("sensor", Kind::Marker);
1070        d.set("temp", Kind::Marker);
1071        d.set("dis", Kind::Str(format!("Point {id}")));
1072        d.set("equipRef", Kind::Ref(HRef::from_val(equip_ref)));
1073        d.set(
1074            "curVal",
1075            Kind::Number(Number::new(72.5, Some("\u{00b0}F".into()))),
1076        );
1077        d
1078    }
1079
1080    // ── Add tests ──
1081
1082    #[test]
1083    fn add_entity_with_valid_id() {
1084        let mut g = EntityGraph::new();
1085        let result = g.add(make_site("site-1"));
1086        assert!(result.is_ok());
1087        assert_eq!(result.unwrap(), "site-1");
1088        assert_eq!(g.len(), 1);
1089    }
1090
1091    #[test]
1092    fn add_entity_missing_id_fails() {
1093        let mut g = EntityGraph::new();
1094        let entity = HDict::new();
1095        let err = g.add(entity).unwrap_err();
1096        assert!(matches!(err, GraphError::MissingId));
1097    }
1098
1099    #[test]
1100    fn add_entity_non_ref_id_fails() {
1101        let mut g = EntityGraph::new();
1102        let mut entity = HDict::new();
1103        entity.set("id", Kind::Str("not-a-ref".into()));
1104        let err = g.add(entity).unwrap_err();
1105        assert!(matches!(err, GraphError::InvalidId));
1106    }
1107
1108    #[test]
1109    fn add_duplicate_ref_fails() {
1110        let mut g = EntityGraph::new();
1111        g.add(make_site("site-1")).unwrap();
1112        let err = g.add(make_site("site-1")).unwrap_err();
1113        assert!(matches!(err, GraphError::DuplicateRef(_)));
1114    }
1115
1116    // ── Get tests ──
1117
1118    #[test]
1119    fn get_existing_entity() {
1120        let mut g = EntityGraph::new();
1121        g.add(make_site("site-1")).unwrap();
1122        let entity = g.get("site-1").unwrap();
1123        assert!(entity.has("site"));
1124        assert_eq!(entity.get("dis"), Some(&Kind::Str("Site site-1".into())));
1125    }
1126
1127    #[test]
1128    fn get_missing_entity_returns_none() {
1129        let g = EntityGraph::new();
1130        assert!(g.get("nonexistent").is_none());
1131    }
1132
1133    // ── Update tests ──
1134
1135    #[test]
1136    fn update_merges_changes() {
1137        let mut g = EntityGraph::new();
1138        g.add(make_site("site-1")).unwrap();
1139
1140        let mut changes = HDict::new();
1141        changes.set("dis", Kind::Str("Updated Site".into()));
1142        changes.set("geoCity", Kind::Str("Richmond".into()));
1143        g.update("site-1", changes).unwrap();
1144
1145        let entity = g.get("site-1").unwrap();
1146        assert_eq!(entity.get("dis"), Some(&Kind::Str("Updated Site".into())));
1147        assert_eq!(entity.get("geoCity"), Some(&Kind::Str("Richmond".into())));
1148        assert!(entity.has("site")); // unchanged
1149    }
1150
1151    #[test]
1152    fn update_missing_entity_fails() {
1153        let mut g = EntityGraph::new();
1154        let mut changes = HDict::new();
1155        changes.set("dis", Kind::Str("test".into()));
1156        let err = g.update("nonexistent", changes).unwrap_err();
1157        assert!(matches!(err, GraphError::NotFound(_)));
1158    }
1159
1160    // ── Remove tests ──
1161
1162    #[test]
1163    fn remove_entity() {
1164        let mut g = EntityGraph::new();
1165        g.add(make_site("site-1")).unwrap();
1166        let removed = g.remove("site-1").unwrap();
1167        assert!(removed.has("site"));
1168        assert!(g.get("site-1").is_none());
1169        assert_eq!(g.len(), 0);
1170    }
1171
1172    #[test]
1173    fn remove_missing_entity_fails() {
1174        let mut g = EntityGraph::new();
1175        let err = g.remove("nonexistent").unwrap_err();
1176        assert!(matches!(err, GraphError::NotFound(_)));
1177    }
1178
1179    #[test]
1180    fn id_freelist_recycles_removed_ids() {
1181        let mut g = EntityGraph::new();
1182
1183        // Add 3 entities: IDs 0, 1, 2
1184        for i in 0..3 {
1185            let mut e = HDict::new();
1186            e.set("id", Kind::Ref(HRef::from_val(format!("e-{i}"))));
1187            g.add(e).unwrap();
1188        }
1189
1190        // Remove entity 1 (frees ID 1)
1191        g.remove("e-1").unwrap();
1192
1193        // Add a new entity — should reuse ID 1, not allocate ID 3
1194        let mut e = HDict::new();
1195        e.set("id", Kind::Ref(HRef::from_val("e-new")));
1196        g.add(e).unwrap();
1197
1198        // Graph should have 3 entities and next_id should still be 3
1199        assert_eq!(g.len(), 3);
1200    }
1201
1202    // ── Version / changelog tests ──
1203
1204    #[test]
1205    fn version_increments_on_mutations() {
1206        let mut g = EntityGraph::new();
1207        assert_eq!(g.version(), 0);
1208
1209        g.add(make_site("site-1")).unwrap();
1210        assert_eq!(g.version(), 1);
1211
1212        let mut changes = HDict::new();
1213        changes.set("dis", Kind::Str("Updated".into()));
1214        g.update("site-1", changes).unwrap();
1215        assert_eq!(g.version(), 2);
1216
1217        g.remove("site-1").unwrap();
1218        assert_eq!(g.version(), 3);
1219    }
1220
1221    #[test]
1222    fn changelog_records_add_update_remove() {
1223        let mut g = EntityGraph::new();
1224        g.add(make_site("site-1")).unwrap();
1225        let mut update_changes = HDict::new();
1226        update_changes.set("dis", Kind::Str("Updated".into()));
1227        g.update("site-1", update_changes).unwrap();
1228        g.remove("site-1").unwrap();
1229
1230        let changes = g.changes_since(0).unwrap();
1231        assert_eq!(changes.len(), 3);
1232
1233        // Add: has new, no old, no deltas.
1234        assert_eq!(changes[0].op, DiffOp::Add);
1235        assert_eq!(changes[0].ref_val, "site-1");
1236        assert!(changes[0].old.is_none());
1237        assert!(changes[0].new.is_some());
1238        assert!(changes[0].changed_tags.is_none());
1239
1240        // Update: has deltas, no old/new.
1241        assert_eq!(changes[1].op, DiffOp::Update);
1242        assert!(changes[1].old.is_none());
1243        assert!(changes[1].new.is_none());
1244        assert!(changes[1].changed_tags.is_some());
1245        assert!(changes[1].previous_tags.is_some());
1246
1247        // Remove: has old, no new, no deltas.
1248        assert_eq!(changes[2].op, DiffOp::Remove);
1249        assert!(changes[2].old.is_some());
1250        assert!(changes[2].new.is_none());
1251        assert!(changes[2].changed_tags.is_none());
1252    }
1253
1254    #[test]
1255    fn changes_since_returns_subset() {
1256        let mut g = EntityGraph::new();
1257        g.add(make_site("site-1")).unwrap(); // v1
1258        g.add(make_site("site-2")).unwrap(); // v2
1259        g.add(make_site("site-3")).unwrap(); // v3
1260
1261        let since_v2 = g.changes_since(2).unwrap();
1262        assert_eq!(since_v2.len(), 1);
1263        assert_eq!(since_v2[0].ref_val, "site-3");
1264    }
1265
1266    #[test]
1267    fn configurable_changelog_capacity() {
1268        let mut g = EntityGraph::with_changelog_capacity(3);
1269        assert_eq!(g.changelog_capacity(), 3);
1270
1271        // Add 5 entities — first 2 should be evicted from changelog.
1272        for i in 0..5 {
1273            g.add(make_site(&format!("site-{i}"))).unwrap();
1274        }
1275
1276        assert_eq!(g.version(), 5);
1277        assert_eq!(g.floor_version(), 2); // v1 and v2 evicted
1278
1279        // Can still get changes from v2 onwards.
1280        let changes = g.changes_since(2).unwrap();
1281        assert_eq!(changes.len(), 3); // v3, v4, v5
1282
1283        // Requesting from v1 (evicted) should return ChangelogGap.
1284        let gap = g.changes_since(1).unwrap_err();
1285        assert_eq!(gap.subscriber_version, 1);
1286        assert_eq!(gap.floor_version, 2);
1287    }
1288
1289    #[test]
1290    fn changelog_gap_on_version_zero_after_eviction() {
1291        let mut g = EntityGraph::with_changelog_capacity(2);
1292        for i in 0..4 {
1293            g.add(make_site(&format!("site-{i}"))).unwrap();
1294        }
1295
1296        // Requesting since v0 after evictions should return gap.
1297        let gap = g.changes_since(0).unwrap_err();
1298        assert_eq!(gap.subscriber_version, 0);
1299        assert!(gap.floor_version > 0);
1300    }
1301
1302    #[test]
1303    fn no_gap_when_capacity_sufficient() {
1304        let mut g = EntityGraph::with_changelog_capacity(100);
1305        for i in 0..50 {
1306            g.add(make_site(&format!("site-{i}"))).unwrap();
1307        }
1308        assert_eq!(g.floor_version(), 0);
1309        let changes = g.changes_since(0).unwrap();
1310        assert_eq!(changes.len(), 50);
1311    }
1312
1313    #[test]
1314    fn changelog_entries_have_timestamps() {
1315        let mut g = EntityGraph::new();
1316        g.add(make_site("site-1")).unwrap();
1317        let mut ts_changes = HDict::new();
1318        ts_changes.set("dis", Kind::Str("Updated".into()));
1319        g.update("site-1", ts_changes).unwrap();
1320        g.remove("site-1").unwrap();
1321
1322        let changes = g.changes_since(0).unwrap();
1323        for diff in &changes {
1324            assert!(diff.timestamp > 0, "timestamp should be positive");
1325        }
1326        // Timestamps should be non-decreasing.
1327        for pair in changes.windows(2) {
1328            assert!(pair[1].timestamp >= pair[0].timestamp);
1329        }
1330    }
1331
1332    #[test]
1333    fn update_diff_carries_delta_tags() {
1334        let mut g = EntityGraph::new();
1335        let mut site = HDict::new();
1336        site.set("id", Kind::Ref(HRef::from_val("site-1")));
1337        site.set("site", Kind::Marker);
1338        site.set("dis", Kind::Str("Original".into()));
1339        site.set("area", Kind::Number(Number::unitless(1000.0)));
1340        g.add(site).unwrap();
1341
1342        let mut changes = HDict::new();
1343        changes.set("dis", Kind::Str("Updated".into()));
1344        g.update("site-1", changes).unwrap();
1345
1346        let diffs = g.changes_since(1).unwrap(); // skip the Add
1347        assert_eq!(diffs.len(), 1);
1348        let diff = &diffs[0];
1349        assert_eq!(diff.op, DiffOp::Update);
1350
1351        // old/new should be None for Update (delta only).
1352        assert!(diff.old.is_none());
1353        assert!(diff.new.is_none());
1354
1355        // changed_tags has the new value.
1356        let ct = diff.changed_tags.as_ref().unwrap();
1357        assert_eq!(ct.get("dis"), Some(&Kind::Str("Updated".into())));
1358        assert!(ct.get("area").is_none()); // unchanged tag not included
1359
1360        // previous_tags has the old value.
1361        let pt = diff.previous_tags.as_ref().unwrap();
1362        assert_eq!(pt.get("dis"), Some(&Kind::Str("Original".into())));
1363    }
1364
1365    // ── Container tests ──
1366
1367    #[test]
1368    fn contains_check() {
1369        let mut g = EntityGraph::new();
1370        g.add(make_site("site-1")).unwrap();
1371        assert!(g.contains("site-1"));
1372        assert!(!g.contains("site-2"));
1373    }
1374
1375    #[test]
1376    fn len_and_is_empty() {
1377        let mut g = EntityGraph::new();
1378        assert!(g.is_empty());
1379        assert_eq!(g.len(), 0);
1380
1381        g.add(make_site("site-1")).unwrap();
1382        assert!(!g.is_empty());
1383        assert_eq!(g.len(), 1);
1384    }
1385
1386    // ── Query tests ──
1387
1388    #[test]
1389    fn read_with_simple_has_filter() {
1390        let mut g = EntityGraph::new();
1391        g.add(make_site("site-1")).unwrap();
1392        g.add(make_equip("equip-1", "site-1")).unwrap();
1393
1394        let results = g.read_all("site", 0).unwrap();
1395        assert_eq!(results.len(), 1);
1396        assert!(results[0].has("site"));
1397    }
1398
1399    #[test]
1400    fn read_with_comparison_filter() {
1401        let mut g = EntityGraph::new();
1402        g.add(make_point("pt-1", "equip-1")).unwrap();
1403
1404        let results = g.read_all("curVal > 70\u{00b0}F", 0).unwrap();
1405        assert_eq!(results.len(), 1);
1406    }
1407
1408    #[test]
1409    fn read_with_and_filter() {
1410        let mut g = EntityGraph::new();
1411        g.add(make_point("pt-1", "equip-1")).unwrap();
1412        g.add(make_equip("equip-1", "site-1")).unwrap();
1413
1414        let results = g.read_all("point and sensor", 0).unwrap();
1415        assert_eq!(results.len(), 1);
1416    }
1417
1418    #[test]
1419    fn read_with_or_filter() {
1420        let mut g = EntityGraph::new();
1421        g.add(make_site("site-1")).unwrap();
1422        g.add(make_equip("equip-1", "site-1")).unwrap();
1423
1424        let results = g.read_all("site or equip", 0).unwrap();
1425        assert_eq!(results.len(), 2);
1426    }
1427
1428    #[test]
1429    fn read_limit_parameter_works() {
1430        let mut g = EntityGraph::new();
1431        g.add(make_site("site-1")).unwrap();
1432        g.add(make_site("site-2")).unwrap();
1433        g.add(make_site("site-3")).unwrap();
1434
1435        let results = g.read_all("site", 2).unwrap();
1436        assert_eq!(results.len(), 2);
1437    }
1438
1439    #[test]
1440    fn read_returns_grid() {
1441        let mut g = EntityGraph::new();
1442        g.add(make_site("site-1")).unwrap();
1443        g.add(make_site("site-2")).unwrap();
1444
1445        let grid = g.read("site", 0).unwrap();
1446        assert_eq!(grid.len(), 2);
1447        assert!(grid.col("site").is_some());
1448        assert!(grid.col("id").is_some());
1449    }
1450
1451    #[test]
1452    fn read_invalid_filter() {
1453        let g = EntityGraph::new();
1454        let err = g.read("!!!", 0).unwrap_err();
1455        assert!(matches!(err, GraphError::Filter(_)));
1456    }
1457
1458    #[test]
1459    fn query_cache_returns_same_results() {
1460        let mut g = EntityGraph::new();
1461        g.add(make_site("site-1")).unwrap();
1462        g.add(make_equip("equip-1", "site-1")).unwrap();
1463        g.add(make_point("pt-1", "equip-1")).unwrap();
1464
1465        // First call populates cache
1466        let results1 = g.read_all("site", 0).unwrap();
1467        assert_eq!(results1.len(), 1);
1468
1469        // Second call should hit cache and return same results
1470        let results2 = g.read_all("site", 0).unwrap();
1471        assert_eq!(results2.len(), 1);
1472        assert_eq!(results1[0].get("id"), results2[0].get("id"));
1473    }
1474
1475    #[test]
1476    fn query_cache_invalidated_by_mutation() {
1477        let mut g = EntityGraph::new();
1478        g.add(make_site("site-1")).unwrap();
1479
1480        let results = g.read_all("site", 0).unwrap();
1481        assert_eq!(results.len(), 1);
1482
1483        // Add another site — cache should be invalidated by version bump
1484        g.add(make_site("site-2")).unwrap();
1485
1486        let results = g.read_all("site", 0).unwrap();
1487        assert_eq!(results.len(), 2);
1488    }
1489
1490    #[test]
1491    fn query_cache_capacity_scales_with_entity_count() {
1492        let mut g = EntityGraph::new();
1493        // Default cache should start at 256
1494        assert_eq!(g.query_cache_capacity(), 256);
1495        for i in 0..500 {
1496            let mut e = HDict::new();
1497            e.set("id", Kind::Ref(HRef::from_val(format!("e-{i}"))));
1498            e.set("site", Kind::Marker);
1499            g.add(e).unwrap();
1500        }
1501        // For 500 entities: (500/100).clamp(256, 1024) = 256 (still minimum)
1502        assert!(g.query_cache_capacity() >= 256);
1503    }
1504
1505    // ── Ref traversal tests ──
1506
1507    #[test]
1508    fn refs_from_returns_targets() {
1509        let mut g = EntityGraph::new();
1510        g.add(make_site("site-1")).unwrap();
1511        g.add(make_equip("equip-1", "site-1")).unwrap();
1512
1513        let targets = g.refs_from("equip-1", None);
1514        assert_eq!(targets, vec!["site-1".to_string()]);
1515    }
1516
1517    #[test]
1518    fn refs_to_returns_sources() {
1519        let mut g = EntityGraph::new();
1520        g.add(make_site("site-1")).unwrap();
1521        g.add(make_equip("equip-1", "site-1")).unwrap();
1522        g.add(make_equip("equip-2", "site-1")).unwrap();
1523
1524        let mut sources = g.refs_to("site-1", None);
1525        sources.sort();
1526        assert_eq!(sources.len(), 2);
1527    }
1528
1529    #[test]
1530    fn type_filtered_ref_queries() {
1531        let mut g = EntityGraph::new();
1532        g.add(make_site("site-1")).unwrap();
1533        g.add(make_equip("equip-1", "site-1")).unwrap();
1534
1535        let targets = g.refs_from("equip-1", Some("siteRef"));
1536        assert_eq!(targets, vec!["site-1".to_string()]);
1537
1538        let targets = g.refs_from("equip-1", Some("equipRef"));
1539        assert!(targets.is_empty());
1540    }
1541
1542    #[test]
1543    fn refs_from_nonexistent_entity() {
1544        let g = EntityGraph::new();
1545        assert!(g.refs_from("nonexistent", None).is_empty());
1546    }
1547
1548    #[test]
1549    fn refs_to_nonexistent_entity() {
1550        let g = EntityGraph::new();
1551        assert!(g.refs_to("nonexistent", None).is_empty());
1552    }
1553
1554    // ── Serialization tests ──
1555
1556    #[test]
1557    fn from_grid_round_trip() {
1558        let mut g = EntityGraph::new();
1559        g.add(make_site("site-1")).unwrap();
1560        g.add(make_equip("equip-1", "site-1")).unwrap();
1561
1562        let grid = g.to_grid("site or equip").unwrap();
1563        assert_eq!(grid.len(), 2);
1564
1565        let g2 = EntityGraph::from_grid(&grid, None).unwrap();
1566        assert_eq!(g2.len(), 2);
1567        assert!(g2.contains("site-1"));
1568        assert!(g2.contains("equip-1"));
1569    }
1570
1571    #[test]
1572    fn to_grid_empty_result() {
1573        let g = EntityGraph::new();
1574        let grid = g.to_grid("site").unwrap();
1575        assert!(grid.is_empty());
1576    }
1577
1578    // ── Update re-indexes correctly ──
1579
1580    #[test]
1581    fn update_reindexes_tags() {
1582        let mut g = EntityGraph::new();
1583        g.add(make_site("site-1")).unwrap();
1584
1585        // Should find the site with "site" filter.
1586        assert_eq!(g.read_all("site", 0).unwrap().len(), 1);
1587
1588        // Remove the "site" marker via update.
1589        let mut changes = HDict::new();
1590        changes.set("site", Kind::Remove);
1591        g.update("site-1", changes).unwrap();
1592
1593        // Should no longer match "site" filter.
1594        assert_eq!(g.read_all("site", 0).unwrap().len(), 0);
1595    }
1596
1597    #[test]
1598    fn update_reindexes_refs() {
1599        let mut g = EntityGraph::new();
1600        g.add(make_site("site-1")).unwrap();
1601        g.add(make_site("site-2")).unwrap();
1602        g.add(make_equip("equip-1", "site-1")).unwrap();
1603
1604        // Initially equip-1 points to site-1.
1605        assert_eq!(g.refs_from("equip-1", None), vec!["site-1".to_string()]);
1606
1607        // Move equip-1 to site-2.
1608        let mut changes = HDict::new();
1609        changes.set("siteRef", Kind::Ref(HRef::from_val("site-2")));
1610        g.update("equip-1", changes).unwrap();
1611
1612        assert_eq!(g.refs_from("equip-1", None), vec!["site-2".to_string()]);
1613        assert!(g.refs_to("site-1", None).is_empty());
1614    }
1615
1616    #[test]
1617    fn update_delta_indexing_preserves_unchanged_tags() {
1618        let mut g = EntityGraph::new();
1619        let mut e = HDict::new();
1620        e.set("id", Kind::Ref(HRef::from_val("p-1")));
1621        e.set("point", Kind::Marker);
1622        e.set("sensor", Kind::Marker);
1623        e.set("curVal", Kind::Number(Number::unitless(72.0)));
1624        e.set("siteRef", Kind::Ref(HRef::from_val("site-1")));
1625        g.add(e).unwrap();
1626
1627        // Update only curVal — point, sensor, siteRef should remain indexed.
1628        let mut changes = HDict::new();
1629        changes.set("curVal", Kind::Number(Number::unitless(75.0)));
1630        g.update("p-1", changes).unwrap();
1631
1632        // Verify tag bitmap still has point and sensor.
1633        let results = g.read_all("point and sensor", 0).unwrap();
1634        assert_eq!(results.len(), 1);
1635
1636        // Verify ref adjacency still works.
1637        let refs = g.refs_from("p-1", Some("siteRef"));
1638        assert_eq!(refs, vec!["site-1".to_string()]);
1639
1640        // Verify value index has the new curVal.
1641        let results = g.read_all("curVal >= 74", 0).unwrap();
1642        assert_eq!(results.len(), 1);
1643    }
1644
1645    // ── Dangling ref validation ──
1646
1647    #[test]
1648    fn validate_detects_dangling_refs() {
1649        let mut g = EntityGraph::new();
1650        g.add(make_site("site-1")).unwrap();
1651        // equip-1 has siteRef pointing to "site-1" (exists) — no issue
1652        g.add(make_equip("equip-1", "site-1")).unwrap();
1653        // equip-2 has siteRef pointing to "site-999" (does not exist) — dangling
1654        g.add(make_equip("equip-2", "site-999")).unwrap();
1655
1656        let issues = g.validate();
1657        assert!(!issues.is_empty());
1658
1659        let dangling: Vec<_> = issues
1660            .iter()
1661            .filter(|i| i.issue_type == "dangling_ref")
1662            .collect();
1663        assert_eq!(dangling.len(), 1);
1664        assert_eq!(dangling[0].entity.as_deref(), Some("equip-2"));
1665        assert!(dangling[0].detail.contains("site-999"));
1666        assert!(dangling[0].detail.contains("siteRef"));
1667    }
1668
1669    // ── Empty filter exports all ──
1670
1671    #[test]
1672    fn to_grid_empty_filter_exports_all() {
1673        let mut g = EntityGraph::new();
1674        g.add(make_site("site-1")).unwrap();
1675        g.add(make_equip("equip-1", "site-1")).unwrap();
1676        g.add(make_point("pt-1", "equip-1")).unwrap();
1677
1678        let grid = g.to_grid("").unwrap();
1679        assert_eq!(grid.len(), 3);
1680        assert!(grid.col("id").is_some());
1681    }
1682
1683    // ── from_grid skips rows without id ──
1684
1685    #[test]
1686    fn changelog_bounded_to_max_size() {
1687        // Use a small capacity to test bounding without 50K iterations.
1688        let mut graph = EntityGraph::with_changelog_capacity(100);
1689        for i in 0..200 {
1690            let mut d = HDict::new();
1691            d.set("id", Kind::Ref(HRef::from_val(format!("e{i}"))));
1692            d.set("dis", Kind::Str(format!("Entity {i}")));
1693            graph.add(d).unwrap();
1694        }
1695        // Changelog should be capped at capacity.
1696        // Requesting since floor_version should succeed.
1697        let floor = graph.floor_version();
1698        assert!(floor > 0);
1699        let changes = graph.changes_since(floor).unwrap();
1700        assert!(changes.len() <= 100);
1701        // Latest changes should still be present.
1702        assert!(graph.changes_since(199).unwrap().len() <= 1);
1703        // Old versions should return gap.
1704        assert!(graph.changes_since(0).is_err());
1705    }
1706
1707    #[test]
1708    fn from_grid_skips_rows_without_id() {
1709        let cols = vec![HCol::new("id"), HCol::new("dis"), HCol::new("site")];
1710
1711        let mut row_with_id = HDict::new();
1712        row_with_id.set("id", Kind::Ref(HRef::from_val("site-1")));
1713        row_with_id.set("site", Kind::Marker);
1714        row_with_id.set("dis", Kind::Str("Has ID".into()));
1715
1716        // Row with string id (not a Ref) — should be skipped.
1717        let mut row_bad_id = HDict::new();
1718        row_bad_id.set("id", Kind::Str("not-a-ref".into()));
1719        row_bad_id.set("dis", Kind::Str("Bad ID".into()));
1720
1721        // Row with no id at all — should be skipped.
1722        let mut row_no_id = HDict::new();
1723        row_no_id.set("dis", Kind::Str("No ID".into()));
1724
1725        let grid = HGrid::from_parts(HDict::new(), cols, vec![row_with_id, row_bad_id, row_no_id]);
1726        let g = EntityGraph::from_grid(&grid, None).unwrap();
1727
1728        assert_eq!(g.len(), 1);
1729        assert!(g.contains("site-1"));
1730    }
1731
1732    fn build_hierarchy_graph() -> EntityGraph {
1733        let mut g = EntityGraph::new();
1734        g.add(make_site("s1")).unwrap();
1735        g.add(make_site("s2")).unwrap();
1736        g.add(make_equip("e1", "s1")).unwrap();
1737        g.add(make_equip("e2", "s1")).unwrap();
1738        g.add(make_equip("e3", "s2")).unwrap();
1739        g.add(make_point("p1", "e1")).unwrap();
1740        g.add(make_point("p2", "e1")).unwrap();
1741        g.add(make_point("p3", "e2")).unwrap();
1742        g
1743    }
1744
1745    // ── Haystack Hierarchy Helper tests ──
1746
1747    #[test]
1748    fn ref_chain_walks_equip_to_site() {
1749        let g = build_hierarchy_graph();
1750        // p1 → equipRef=e1 → siteRef=s1
1751        let chain = g.ref_chain("p1", &["equipRef", "siteRef"]);
1752        assert_eq!(chain.len(), 2);
1753        assert_eq!(chain[0].id().unwrap().val, "e1");
1754        assert_eq!(chain[1].id().unwrap().val, "s1");
1755    }
1756
1757    #[test]
1758    fn ref_chain_stops_on_missing_tag() {
1759        let g = build_hierarchy_graph();
1760        // e1 has siteRef but no spaceRef — should return just the site.
1761        let chain = g.ref_chain("e1", &["siteRef", "spaceRef"]);
1762        assert_eq!(chain.len(), 1);
1763        assert_eq!(chain[0].id().unwrap().val, "s1");
1764    }
1765
1766    #[test]
1767    fn ref_chain_empty_for_nonexistent() {
1768        let g = build_hierarchy_graph();
1769        let chain = g.ref_chain("nonexistent", &["equipRef"]);
1770        assert!(chain.is_empty());
1771    }
1772
1773    #[test]
1774    fn site_for_returns_site_itself() {
1775        let g = build_hierarchy_graph();
1776        let site = g.site_for("s1").unwrap();
1777        assert_eq!(site.id().unwrap().val, "s1");
1778    }
1779
1780    #[test]
1781    fn site_for_walks_from_point() {
1782        let g = build_hierarchy_graph();
1783        // p1 → equipRef=e1 → siteRef=s1
1784        let site = g.site_for("p1").unwrap();
1785        assert_eq!(site.id().unwrap().val, "s1");
1786    }
1787
1788    #[test]
1789    fn site_for_walks_from_equip() {
1790        let g = build_hierarchy_graph();
1791        let site = g.site_for("e1").unwrap();
1792        assert_eq!(site.id().unwrap().val, "s1");
1793    }
1794
1795    #[test]
1796    fn children_returns_direct_refs() {
1797        let g = build_hierarchy_graph();
1798        let kids = g.children("s1");
1799        // e1 and e2 reference s1 via siteRef.
1800        let ids: Vec<&str> = kids.iter().map(|e| e.id().unwrap().val.as_str()).collect();
1801        assert!(ids.contains(&"e1"));
1802        assert!(ids.contains(&"e2"));
1803    }
1804
1805    #[test]
1806    fn equip_points_returns_points_only() {
1807        let g = build_hierarchy_graph();
1808        let points = g.equip_points("e1", None).unwrap();
1809        assert_eq!(points.len(), 2); // p1, p2
1810        for p in &points {
1811            assert!(p.has("point"));
1812        }
1813    }
1814
1815    #[test]
1816    fn equip_points_with_filter() {
1817        let mut g = build_hierarchy_graph();
1818        // Existing points already have temp marker. Add one with flow instead.
1819        let mut pf = HDict::new();
1820        pf.set("id", Kind::Ref(HRef::from_val("pf")));
1821        pf.set("point", Kind::Marker);
1822        pf.set("flow", Kind::Marker);
1823        pf.set("equipRef", Kind::Ref(HRef::from_val("e1")));
1824        g.add(pf).unwrap();
1825
1826        let temp_points = g.equip_points("e1", Some("temp")).unwrap();
1827        // Only p1 and p2 have temp (the existing ones).
1828        assert_eq!(temp_points.len(), 2);
1829        assert!(temp_points.iter().all(|p| p.has("temp")));
1830    }
1831
1832    // ── Hierarchy tree tests ──
1833
1834    #[test]
1835    fn hierarchy_tree_from_site() {
1836        let g = build_hierarchy_graph();
1837        let tree = g.hierarchy_tree("s1", 10).unwrap();
1838        assert_eq!(tree.depth, 0);
1839        assert_eq!(tree.entity.id().unwrap().val, "s1");
1840        // s1 has children e1, e2
1841        assert_eq!(tree.children.len(), 2);
1842        let child_ids: Vec<String> = tree
1843            .children
1844            .iter()
1845            .map(|c| c.entity.id().unwrap().val.clone())
1846            .collect();
1847        assert!(child_ids.contains(&"e1".to_string()));
1848        assert!(child_ids.contains(&"e2".to_string()));
1849        // e1 has children p1, p2
1850        let e1_node = tree
1851            .children
1852            .iter()
1853            .find(|c| c.entity.id().unwrap().val == "e1")
1854            .unwrap();
1855        assert_eq!(e1_node.children.len(), 2);
1856        let point_ids: Vec<String> = e1_node
1857            .children
1858            .iter()
1859            .map(|c| c.entity.id().unwrap().val.clone())
1860            .collect();
1861        assert!(point_ids.contains(&"p1".to_string()));
1862        assert!(point_ids.contains(&"p2".to_string()));
1863    }
1864
1865    #[test]
1866    fn hierarchy_tree_max_depth() {
1867        let g = build_hierarchy_graph();
1868        // depth 0 = root only
1869        let tree = g.hierarchy_tree("s1", 0).unwrap();
1870        assert!(tree.children.is_empty());
1871        // depth 1 = root + direct children
1872        let tree = g.hierarchy_tree("s1", 1).unwrap();
1873        assert_eq!(tree.children.len(), 2);
1874        assert!(tree.children.iter().all(|c| c.children.is_empty()));
1875    }
1876
1877    #[test]
1878    fn hierarchy_tree_missing_root() {
1879        let g = build_hierarchy_graph();
1880        assert!(g.hierarchy_tree("nonexistent", 10).is_none());
1881    }
1882
1883    // ── Classify tests ──
1884
1885    #[test]
1886    fn classify_site() {
1887        let g = build_hierarchy_graph();
1888        assert_eq!(g.classify("s1").unwrap(), "site");
1889    }
1890
1891    #[test]
1892    fn classify_equip() {
1893        let mut g = EntityGraph::new();
1894        let mut d = HDict::new();
1895        d.set("id", Kind::Ref(HRef::from_val("ahu-1")));
1896        d.set("equip", Kind::Marker);
1897        d.set("ahu", Kind::Marker);
1898        g.add(d).unwrap();
1899        assert_eq!(g.classify("ahu-1").unwrap(), "ahu");
1900    }
1901
1902    #[test]
1903    fn classify_point() {
1904        let g = build_hierarchy_graph();
1905        // Points have point + sensor + temp markers; sensor is most specific.
1906        assert_eq!(g.classify("p1").unwrap(), "sensor");
1907    }
1908
1909    #[test]
1910    fn classify_unknown() {
1911        let mut g = EntityGraph::new();
1912        let mut d = HDict::new();
1913        d.set("id", Kind::Ref(HRef::from_val("x1")));
1914        d.set("custom", Kind::Marker);
1915        g.add(d).unwrap();
1916        assert!(g.classify("x1").is_none());
1917    }
1918
1919    #[test]
1920    fn changes_since_binary_search_equivalence() {
1921        let mut g = EntityGraph::new();
1922        for i in 0..100 {
1923            let mut e = HDict::new();
1924            e.set("id", Kind::Ref(HRef::from_val(format!("e-{i}"))));
1925            e.set("site", Kind::Marker);
1926            g.add(e).unwrap();
1927        }
1928        // After 100 adds, version is 100.
1929        // changes_since(50) should return versions 51..=100 (50 entries).
1930        let changes = g.changes_since(50).unwrap();
1931        assert_eq!(changes.len(), 50);
1932        assert_eq!(changes.first().unwrap().version, 51);
1933        assert_eq!(changes.last().unwrap().version, 100);
1934    }
1935}