Skip to main content

khive_types/
entity.rs

1//! Entity substrate — graph nodes with typed properties and links.
2
3extern crate alloc;
4use alloc::collections::BTreeMap;
5use alloc::string::String;
6use alloc::vec::Vec;
7use core::fmt;
8use core::str::FromStr;
9
10use crate::{EdgeRelation, Header, Id128, Timestamp};
11
12/// Taxonomy for entity classification in a research knowledge graph (ADR-001).
13///
14/// 6 kinds, chosen for agent reliability: agents classify these correctly
15/// with unambiguous signals. Finer distinctions (algorithm vs technique,
16/// model vs architecture) live in `properties` — they don't enable useful
17/// queries with the 13-relation edge ontology and cause 20-30% misclassification.
18#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
19#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
20#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
21pub enum EntityKind {
22    /// Algorithms, techniques, architectures, theories, models, research gaps.
23    /// The default / residual bucket. Use `properties.type` for finer grain.
24    #[default]
25    Concept,
26    /// Papers, preprints, technical reports, blog posts, books.
27    /// Has: title, authors, year, venue, DOI/URL.
28    Document,
29    /// Benchmarks, corpora, evaluation sets.
30    /// Has: task type, size, metrics, license.
31    Dataset,
32    /// Codebases, libraries, tools, frameworks.
33    /// Has: language, repo URL, license.
34    Project,
35    /// Researchers, engineers, authors.
36    Person,
37    /// Labs, companies, institutions.
38    Org,
39}
40
41impl EntityKind {
42    pub const ALL: [Self; 6] = [
43        Self::Concept,
44        Self::Document,
45        Self::Dataset,
46        Self::Project,
47        Self::Person,
48        Self::Org,
49    ];
50
51    pub const fn name(self) -> &'static str {
52        match self {
53            Self::Concept => "concept",
54            Self::Document => "document",
55            Self::Dataset => "dataset",
56            Self::Project => "project",
57            Self::Person => "person",
58            Self::Org => "org",
59        }
60    }
61}
62
63impl fmt::Display for EntityKind {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        f.write_str(self.name())
66    }
67}
68
69impl FromStr for EntityKind {
70    type Err = String;
71
72    fn from_str(s: &str) -> Result<Self, Self::Err> {
73        match s.trim().to_ascii_lowercase().as_str() {
74            "concept" => Ok(Self::Concept),
75            "document" | "doc" | "paper" => Ok(Self::Document),
76            "dataset" | "data" | "benchmark" => Ok(Self::Dataset),
77            "project" | "repo" | "crate" | "library" | "lib" => Ok(Self::Project),
78            "person" | "author" | "researcher" => Ok(Self::Person),
79            "org" | "organization" | "organisation" | "lab" | "company" => Ok(Self::Org),
80            other => Err(alloc::format!(
81                "unknown entity kind: {other:?}. Valid: concept | document | dataset | project | person | org"
82            )),
83        }
84    }
85}
86
87/// A graph node with a type, display name, and key-value properties.
88#[derive(Clone, Debug)]
89#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
90pub struct Entity {
91    #[cfg_attr(feature = "serde", serde(flatten))]
92    pub header: Header,
93    pub kind: EntityKind,
94    pub name: String,
95    pub description: Option<String>,
96    pub properties: BTreeMap<String, PropertyValue>,
97    pub tags: Vec<String>,
98    pub deleted_at: Option<Timestamp>,
99}
100
101/// A directed, typed edge between two entities (or cross-substrate nodes).
102#[derive(Clone, Debug)]
103#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
104pub struct Link {
105    pub id: Id128,
106    pub source: Id128,
107    pub target: Id128,
108    pub relation: EdgeRelation,
109    pub properties: BTreeMap<String, PropertyValue>,
110    pub weight: f64,
111}
112
113/// Property values stored on entities, links, and notes.
114///
115/// Recursive: supports arrays and nested objects for free-form JSON properties
116/// (e.g. `entity_ids[]`, `alternatives_considered[]` per ADR-019).
117#[derive(Clone, Debug, PartialEq)]
118#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
119#[cfg_attr(feature = "serde", serde(untagged))]
120pub enum PropertyValue {
121    String(String),
122    Integer(i64),
123    Float(f64),
124    Boolean(bool),
125    Array(Vec<PropertyValue>),
126    Object(BTreeMap<String, PropertyValue>),
127    Null,
128}
129
130impl fmt::Display for PropertyValue {
131    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
132        match self {
133            Self::String(s) => f.write_str(s),
134            Self::Integer(n) => write!(f, "{n}"),
135            Self::Float(n) => write!(f, "{n}"),
136            Self::Boolean(b) => write!(f, "{b}"),
137            Self::Array(arr) => write!(f, "[{} items]", arr.len()),
138            Self::Object(obj) => write!(f, "{{{} keys}}", obj.len()),
139            Self::Null => f.write_str("null"),
140        }
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use crate::{Namespace, Timestamp};
148
149    #[test]
150    fn entity_with_properties() {
151        let mut props = BTreeMap::new();
152        props.insert("role".into(), PropertyValue::String("engineer".into()));
153        props.insert("age".into(), PropertyValue::Integer(30));
154
155        let entity = Entity {
156            header: Header::new(
157                Id128::from_u128(1),
158                Namespace::default(),
159                Timestamp::from_secs(1700000000),
160            ),
161            kind: EntityKind::Person,
162            name: "Ocean".into(),
163            description: None,
164            properties: props,
165            tags: alloc::vec![],
166            deleted_at: None,
167        };
168        assert_eq!(entity.kind, EntityKind::Person);
169        assert_eq!(entity.kind.name(), "person");
170        assert_eq!(entity.properties.len(), 2);
171    }
172
173    #[test]
174    fn entity_kind_default_is_concept() {
175        assert_eq!(EntityKind::default(), EntityKind::Concept);
176    }
177
178    #[test]
179    fn entity_kind_display_roundtrip() {
180        for kind in EntityKind::ALL {
181            let s = alloc::format!("{kind}");
182            let parsed = EntityKind::from_str(&s).unwrap();
183            assert_eq!(parsed, kind);
184        }
185    }
186
187    #[test]
188    fn entity_kind_from_str_aliases() {
189        assert_eq!(EntityKind::from_str("doc").unwrap(), EntityKind::Document);
190        assert_eq!(EntityKind::from_str("paper").unwrap(), EntityKind::Document);
191        assert_eq!(
192            EntityKind::from_str("benchmark").unwrap(),
193            EntityKind::Dataset
194        );
195        assert_eq!(EntityKind::from_str("repo").unwrap(), EntityKind::Project);
196        assert_eq!(EntityKind::from_str("author").unwrap(), EntityKind::Person);
197        assert_eq!(EntityKind::from_str("lab").unwrap(), EntityKind::Org);
198    }
199
200    #[test]
201    fn entity_kind_from_str_case_insensitive() {
202        assert_eq!(
203            EntityKind::from_str("CONCEPT").unwrap(),
204            EntityKind::Concept
205        );
206        assert_eq!(EntityKind::from_str("Person").unwrap(), EntityKind::Person);
207    }
208
209    #[test]
210    fn entity_kind_from_str_unknown_errors() {
211        let err = EntityKind::from_str("gadget").unwrap_err();
212        assert!(err.contains("unknown entity kind"));
213    }
214
215    #[test]
216    fn link_construction() {
217        let link = Link {
218            id: Id128::from_u128(100),
219            source: Id128::from_u128(1),
220            target: Id128::from_u128(2),
221            relation: EdgeRelation::Extends,
222            properties: BTreeMap::new(),
223            weight: 1.0,
224        };
225        assert_eq!(link.relation, EdgeRelation::Extends);
226    }
227}