Skip to main content

khive_types/
entity.rs

1//! Entity substrate — graph nodes with typed properties and links.
2
3extern crate alloc;
4use alloc::collections::BTreeMap;
5use alloc::string::String;
6use alloc::vec::Vec;
7use core::fmt;
8use core::str::FromStr;
9
10use crate::{EdgeRelation, Header, Id128, Timestamp};
11
12/// 8 closed base kinds for graph-node classification.
13///
14/// Governed subtype values live in `Entity::entity_type`; `properties` remain
15/// metadata and must not carry ontology type strings.
16#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
17#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
18#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
19pub enum EntityKind {
20    /// Algorithms, techniques, architectures, theories, models, research gaps.
21    /// The default / residual bucket.
22    #[default]
23    Concept,
24    /// Papers, preprints, technical reports, blog posts, books.
25    /// Has: title, authors, year, venue, DOI/URL.
26    Document,
27    /// Benchmarks, corpora, evaluation sets.
28    /// Has: task type, size, metrics, license.
29    Dataset,
30    /// Codebases, libraries, tools, frameworks.
31    /// Has: language, repo URL, license.
32    Project,
33    /// Researchers, engineers, authors.
34    Person,
35    /// Labs, companies, institutions.
36    Org,
37    /// Built artifacts: binaries, model checkpoints, Docker images, packages.
38    Artifact,
39    /// Running or deployable services: APIs, hosted endpoints, SaaS products.
40    Service,
41}
42
43impl EntityKind {
44    /// All 8 canonical entity kinds in taxonomy-table order.
45    pub const ALL: [Self; 8] = [
46        Self::Concept,
47        Self::Document,
48        Self::Dataset,
49        Self::Project,
50        Self::Person,
51        Self::Org,
52        Self::Artifact,
53        Self::Service,
54    ];
55
56    /// Return the canonical lowercase string for this kind, as stored on the wire.
57    pub const fn name(self) -> &'static str {
58        match self {
59            Self::Concept => "concept",
60            Self::Document => "document",
61            Self::Dataset => "dataset",
62            Self::Project => "project",
63            Self::Person => "person",
64            Self::Org => "org",
65            Self::Artifact => "artifact",
66            Self::Service => "service",
67        }
68    }
69}
70
71impl fmt::Display for EntityKind {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        f.write_str(self.name())
74    }
75}
76
77// Canonical entity kind strings for the closed 8-kind taxonomy.
78const ENTITY_KIND_VALID: &[&str] = &[
79    "concept", "document", "dataset", "project", "person", "org", "artifact", "service",
80];
81
82impl FromStr for EntityKind {
83    type Err = crate::error::UnknownVariant;
84
85    /// Parse a string into an `EntityKind`.
86    ///
87    /// Accepts the 8 canonical kind names (case-insensitive) plus a set of
88    /// convenience aliases to aid human-authored DSL requests (e.g. `"paper"`
89    /// resolves to `Document`, `"repo"` to `Project`).
90    ///
91    /// **Note on subtype aliasing**: when `kind="paper"` is parsed here, only the
92    /// base `EntityKind::Document` is returned.  Callers that need to preserve the
93    /// `entity_type` subtoken must use the pack registry resolution path, which
94    /// returns both the base kind and the subtype string.  `from_str` is
95    /// intentionally base-kind-only for use in contexts where the subtype is
96    /// carried separately (e.g. `Entity.entity_type`).
97    fn from_str(s: &str) -> Result<Self, Self::Err> {
98        match s.trim().to_ascii_lowercase().as_str() {
99            "concept" => Ok(Self::Concept),
100            "document" | "doc" | "paper" => Ok(Self::Document),
101            "dataset" | "data" | "benchmark" => Ok(Self::Dataset),
102            "project" | "repo" | "crate" | "library" | "lib" => Ok(Self::Project),
103            "person" | "author" | "researcher" => Ok(Self::Person),
104            "org" | "organization" | "organisation" | "lab" | "company" => Ok(Self::Org),
105            "artifact" | "art" => Ok(Self::Artifact),
106            "service" | "svc" => Ok(Self::Service),
107            other => Err(crate::error::UnknownVariant::new(
108                "entity_kind",
109                other,
110                ENTITY_KIND_VALID,
111            )),
112        }
113    }
114}
115
116/// A graph node with a type, display name, and key-value properties.
117#[derive(Clone, Debug)]
118#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
119pub struct Entity {
120    /// Identity and namespace metadata shared by all substrate records.
121    #[cfg_attr(feature = "serde", serde(flatten))]
122    pub header: Header,
123    /// Closed base kind that classifies this entity.
124    pub kind: EntityKind,
125    /// Pack-governed subtype token (e.g. `"paper"`, `"snapshot"`). Never stored
126    /// raw in `properties` — queries compile this to `entities.entity_type = ?`.
127    pub entity_type: Option<String>,
128    /// Human-readable display name (required; must be non-empty).
129    pub name: String,
130    /// Optional long-form description of this entity.
131    pub description: Option<String>,
132    /// Arbitrary structured metadata as key-value pairs.
133    pub properties: BTreeMap<String, PropertyValue>,
134    /// Categorical labels for filtering and retrieval.
135    pub tags: Vec<String>,
136    /// Set when the entity is soft-deleted; absent means active.
137    pub deleted_at: Option<Timestamp>,
138}
139
140/// A directed, typed edge between two entities (or cross-substrate nodes).
141///
142/// `weight` must be finite and in `[0.0, 1.0]`. When the `serde` feature is
143/// enabled, deserialization rejects out-of-range or non-finite weights.
144#[derive(Clone, Debug)]
145#[cfg_attr(feature = "serde", derive(serde::Serialize))]
146#[cfg_attr(feature = "serde", serde(into = "LinkRaw"))]
147pub struct Link {
148    /// Unique edge identifier.
149    pub id: Id128,
150    /// Namespace that owns and isolates this edge.
151    pub namespace: String,
152    /// Source node identifier.
153    pub source: Id128,
154    /// Target node identifier.
155    pub target: Id128,
156    /// Closed relation type that semantically describes this edge.
157    pub relation: EdgeRelation,
158    /// Arbitrary structured metadata attached to this edge.
159    pub properties: BTreeMap<String, PropertyValue>,
160    /// Numeric edge weight in the range [0.0, 1.0]; 1.0 means definitional strength.
161    pub weight: f64,
162    /// Wall-clock time when this edge was created.
163    pub created_at: Timestamp,
164    /// Wall-clock time of the most recent update.
165    pub updated_at: Timestamp,
166    /// Set when the edge is soft-deleted; absent means active.
167    pub deleted_at: Option<Timestamp>,
168}
169
170impl Link {
171    /// Return `true` if all numeric fields carry finite, domain-valid values.
172    ///
173    /// - `weight` must be finite and in `[0.0, 1.0]`.
174    pub fn is_valid(&self) -> bool {
175        self.weight.is_finite() && self.weight >= 0.0 && self.weight <= 1.0
176    }
177}
178
179#[cfg(feature = "serde")]
180#[derive(serde::Serialize, serde::Deserialize)]
181struct LinkRaw {
182    id: Id128,
183    namespace: String,
184    source: Id128,
185    target: Id128,
186    relation: EdgeRelation,
187    properties: BTreeMap<String, PropertyValue>,
188    weight: f64,
189    created_at: Timestamp,
190    updated_at: Timestamp,
191    deleted_at: Option<Timestamp>,
192}
193
194#[cfg(feature = "serde")]
195impl From<Link> for LinkRaw {
196    fn from(l: Link) -> Self {
197        Self {
198            id: l.id,
199            namespace: l.namespace,
200            source: l.source,
201            target: l.target,
202            relation: l.relation,
203            properties: l.properties,
204            weight: l.weight,
205            created_at: l.created_at,
206            updated_at: l.updated_at,
207            deleted_at: l.deleted_at,
208        }
209    }
210}
211
212#[cfg(feature = "serde")]
213impl TryFrom<LinkRaw> for Link {
214    type Error = String;
215
216    fn try_from(raw: LinkRaw) -> Result<Self, Self::Error> {
217        if !raw.weight.is_finite() {
218            return Err(alloc::format!(
219                "Link weight must be finite, got {}",
220                raw.weight
221            ));
222        }
223        if !(0.0..=1.0).contains(&raw.weight) {
224            return Err(alloc::format!(
225                "Link weight must be in [0.0, 1.0], got {}",
226                raw.weight
227            ));
228        }
229        Ok(Link {
230            id: raw.id,
231            namespace: raw.namespace,
232            source: raw.source,
233            target: raw.target,
234            relation: raw.relation,
235            properties: raw.properties,
236            weight: raw.weight,
237            created_at: raw.created_at,
238            updated_at: raw.updated_at,
239            deleted_at: raw.deleted_at,
240        })
241    }
242}
243
244#[cfg(feature = "serde")]
245impl<'de> serde::Deserialize<'de> for Link {
246    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
247    where
248        D: serde::Deserializer<'de>,
249    {
250        let raw = LinkRaw::deserialize(deserializer)?;
251        Link::try_from(raw).map_err(serde::de::Error::custom)
252    }
253}
254
255/// Property values stored on entities, links, and notes.
256///
257/// Recursive: supports arrays and nested objects for free-form JSON properties
258/// (e.g. `entity_ids[]`, `alternatives_considered[]`).
259#[derive(Clone, Debug, PartialEq)]
260#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
261#[cfg_attr(feature = "serde", serde(untagged))]
262pub enum PropertyValue {
263    String(String),
264    Integer(i64),
265    Float(f64),
266    Boolean(bool),
267    Array(Vec<PropertyValue>),
268    Object(BTreeMap<String, PropertyValue>),
269    Null,
270}
271
272impl fmt::Display for PropertyValue {
273    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
274        match self {
275            Self::String(s) => f.write_str(s),
276            Self::Integer(n) => write!(f, "{n}"),
277            Self::Float(n) => write!(f, "{n}"),
278            Self::Boolean(b) => write!(f, "{b}"),
279            Self::Array(arr) => write!(f, "[{} items]", arr.len()),
280            Self::Object(obj) => write!(f, "{{{} keys}}", obj.len()),
281            Self::Null => f.write_str("null"),
282        }
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289    use crate::{Namespace, Timestamp};
290    #[cfg(feature = "serde")]
291    use alloc::string::ToString;
292
293    #[test]
294    fn entity_with_properties() {
295        let mut props = BTreeMap::new();
296        props.insert("role".into(), PropertyValue::String("engineer".into()));
297        props.insert("age".into(), PropertyValue::Integer(30));
298
299        let entity = Entity {
300            header: Header::new(
301                Id128::from_u128(1),
302                Namespace::local(),
303                Timestamp::from_secs(1700000000),
304            ),
305            kind: EntityKind::Person,
306            entity_type: Some("researcher".into()),
307            name: "Ocean".into(),
308            description: None,
309            properties: props,
310            tags: alloc::vec![],
311            deleted_at: None,
312        };
313        assert_eq!(entity.kind, EntityKind::Person);
314        assert_eq!(entity.kind.name(), "person");
315        assert_eq!(entity.entity_type.as_deref(), Some("researcher"));
316        assert_eq!(entity.properties.len(), 2);
317    }
318
319    #[test]
320    fn entity_kind_default_is_concept() {
321        assert_eq!(EntityKind::default(), EntityKind::Concept);
322    }
323
324    #[test]
325    fn entity_kind_display_roundtrip() {
326        for kind in EntityKind::ALL {
327            let s = alloc::format!("{kind}");
328            let parsed = EntityKind::from_str(&s).unwrap();
329            assert_eq!(parsed, kind);
330        }
331    }
332
333    #[test]
334    fn entity_kind_from_str_aliases() {
335        assert_eq!(EntityKind::from_str("doc").unwrap(), EntityKind::Document);
336        assert_eq!(EntityKind::from_str("paper").unwrap(), EntityKind::Document);
337        assert_eq!(
338            EntityKind::from_str("benchmark").unwrap(),
339            EntityKind::Dataset
340        );
341        assert_eq!(EntityKind::from_str("repo").unwrap(), EntityKind::Project);
342        assert_eq!(EntityKind::from_str("author").unwrap(), EntityKind::Person);
343        assert_eq!(EntityKind::from_str("lab").unwrap(), EntityKind::Org);
344        assert_eq!(EntityKind::from_str("art").unwrap(), EntityKind::Artifact);
345        assert_eq!(EntityKind::from_str("svc").unwrap(), EntityKind::Service);
346    }
347
348    #[test]
349    fn entity_kind_artifact_and_service_roundtrip() {
350        assert_eq!(EntityKind::Artifact.name(), "artifact");
351        assert_eq!(EntityKind::Service.name(), "service");
352        assert_eq!(
353            EntityKind::from_str("artifact").unwrap(),
354            EntityKind::Artifact
355        );
356        assert_eq!(
357            EntityKind::from_str("service").unwrap(),
358            EntityKind::Service
359        );
360    }
361
362    #[test]
363    fn entity_kind_all_has_eight_variants() {
364        assert_eq!(EntityKind::ALL.len(), 8);
365        assert!(EntityKind::ALL.contains(&EntityKind::Artifact));
366        assert!(EntityKind::ALL.contains(&EntityKind::Service));
367    }
368
369    #[test]
370    fn entity_kind_unknown_valid_list_includes_new_kinds() {
371        let err = EntityKind::from_str("gadget").unwrap_err();
372        assert!(err.valid.contains(&"artifact"));
373        assert!(err.valid.contains(&"service"));
374    }
375
376    #[test]
377    fn entity_kind_from_str_case_insensitive() {
378        assert_eq!(
379            EntityKind::from_str("CONCEPT").unwrap(),
380            EntityKind::Concept
381        );
382        assert_eq!(EntityKind::from_str("Person").unwrap(), EntityKind::Person);
383    }
384
385    #[test]
386    fn entity_kind_from_str_unknown_errors() {
387        let err = EntityKind::from_str("gadget").unwrap_err();
388        assert_eq!(err.domain, "entity_kind");
389        assert_eq!(err.value, "gadget");
390        assert!(err.valid.contains(&"concept"));
391    }
392
393    #[test]
394    fn link_construction() {
395        let ts = Timestamp::from_secs(1700000000);
396        let link = Link {
397            id: Id128::from_u128(100),
398            namespace: "default".into(),
399            source: Id128::from_u128(1),
400            target: Id128::from_u128(2),
401            relation: EdgeRelation::Extends,
402            properties: BTreeMap::new(),
403            weight: 1.0,
404            created_at: ts,
405            updated_at: ts,
406            deleted_at: None,
407        };
408        assert_eq!(link.relation, EdgeRelation::Extends);
409        assert!(link.is_valid());
410    }
411
412    #[test]
413    fn link_is_valid_rejects_out_of_range() {
414        let ts = Timestamp::from_secs(1700000000);
415        let link = Link {
416            id: Id128::from_u128(100),
417            namespace: "default".into(),
418            source: Id128::from_u128(1),
419            target: Id128::from_u128(2),
420            relation: EdgeRelation::Extends,
421            properties: BTreeMap::new(),
422            weight: 2.0,
423            created_at: ts,
424            updated_at: ts,
425            deleted_at: None,
426        };
427        assert!(!link.is_valid());
428    }
429
430    #[cfg(feature = "serde")]
431    #[test]
432    fn link_serde_rejects_weight_above_one() {
433        let json = serde_json::json!({
434            "id": "00000000-0000-0000-0000-000000000064",
435            "namespace": "default",
436            "source": "00000000-0000-0000-0000-000000000001",
437            "target": "00000000-0000-0000-0000-000000000002",
438            "relation": "extends",
439            "properties": {},
440            "weight": 2.0,
441            "created_at": 1700000000000000_u64,
442            "updated_at": 1700000000000000_u64,
443            "deleted_at": null
444        });
445        let result: Result<Link, _> = serde_json::from_value(json);
446        assert!(result.is_err());
447        let err = result.unwrap_err().to_string();
448        assert!(
449            err.contains("[0.0, 1.0]"),
450            "error should mention range: {err}"
451        );
452    }
453
454    #[cfg(feature = "serde")]
455    #[test]
456    fn link_serde_rejects_negative_weight() {
457        let json = serde_json::json!({
458            "id": "00000000-0000-0000-0000-000000000064",
459            "namespace": "default",
460            "source": "00000000-0000-0000-0000-000000000001",
461            "target": "00000000-0000-0000-0000-000000000002",
462            "relation": "extends",
463            "properties": {},
464            "weight": -0.1,
465            "created_at": 1700000000000000_u64,
466            "updated_at": 1700000000000000_u64,
467            "deleted_at": null
468        });
469        let result: Result<Link, _> = serde_json::from_value(json);
470        assert!(result.is_err());
471    }
472
473    #[cfg(feature = "serde")]
474    #[test]
475    fn link_serde_accepts_valid_weight() {
476        let json = serde_json::json!({
477            "id": "00000000-0000-0000-0000-000000000064",
478            "namespace": "default",
479            "source": "00000000-0000-0000-0000-000000000001",
480            "target": "00000000-0000-0000-0000-000000000002",
481            "relation": "extends",
482            "properties": {},
483            "weight": 0.75,
484            "created_at": 1700000000000000_u64,
485            "updated_at": 1700000000000000_u64,
486            "deleted_at": null
487        });
488        let link: Link = serde_json::from_value(json).expect("valid weight should deserialize");
489        assert_eq!(link.weight, 0.75);
490    }
491}