Skip to main content

llmtxt_core/
graph.rs

1//! Knowledge graph extraction from message metadata.
2//!
3//! Builds a graph of agents, topics, and decisions from @mentions, #tags,
4//! and /directives in message content.
5//!
6//! Ported from `packages/llmtxt/src/graph.ts`.
7
8#[cfg(feature = "wasm")]
9use wasm_bindgen::prelude::*;
10
11use serde::{Deserialize, Serialize};
12use std::cmp::Reverse;
13use std::collections::{HashMap, HashSet};
14
15// ── Types ──────────────────────────────────────────────────────────
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18#[serde(rename_all = "camelCase")]
19pub struct GraphNode {
20    pub id: String,
21    #[serde(rename = "type")]
22    pub node_type: String,
23    pub label: String,
24    pub weight: u32,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28#[serde(rename_all = "camelCase")]
29pub struct GraphEdge {
30    pub source: String,
31    pub target: String,
32    #[serde(rename = "type")]
33    pub edge_type: String,
34    pub weight: u32,
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38#[serde(rename_all = "camelCase")]
39pub struct GraphStats {
40    pub agent_count: usize,
41    pub topic_count: usize,
42    pub decision_count: usize,
43    pub edge_count: usize,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47#[serde(rename_all = "camelCase")]
48pub struct KnowledgeGraph {
49    pub nodes: Vec<GraphNode>,
50    pub edges: Vec<GraphEdge>,
51    pub stats: GraphStats,
52}
53
54#[derive(Debug, Clone, Deserialize)]
55#[serde(rename_all = "camelCase")]
56pub struct MessageMetadata {
57    pub mentions: Option<Vec<String>>,
58    pub directives: Option<Vec<String>>,
59    pub tags: Option<Vec<String>>,
60}
61
62#[derive(Debug, Clone, Deserialize)]
63#[serde(rename_all = "camelCase")]
64pub struct MessageInput {
65    pub id: String,
66    pub from_agent_id: String,
67    pub content: String,
68    pub metadata: Option<MessageMetadata>,
69    pub created_at: String,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[serde(rename_all = "camelCase")]
74pub struct TopTopic {
75    pub topic: String,
76    pub agents: usize,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80#[serde(rename_all = "camelCase")]
81pub struct TopAgent {
82    pub agent: String,
83    pub activity: u32,
84}
85
86// ── Extraction ─────────────────────────────────────────────────────
87
88/// Extract @mentions from message content.
89///
90/// Returns unique mention names (excluding `@all`), preserving first-seen order.
91pub fn extract_mentions(content: &str) -> Vec<String> {
92    let mut seen = HashSet::new();
93    let mut result = Vec::new();
94    let mut i = 0;
95    let chars: Vec<char> = content.chars().collect();
96
97    while i < chars.len() {
98        if chars[i] == '@' {
99            i += 1;
100            let start = i;
101            while i < chars.len()
102                && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == '-')
103            {
104                i += 1;
105            }
106            let mention: String = chars[start..i].iter().collect();
107            if !mention.is_empty() && mention != "all" && seen.insert(mention.clone()) {
108                result.push(mention);
109            }
110        } else {
111            i += 1;
112        }
113    }
114    result
115}
116
117/// Extract #tags from message content.
118///
119/// Returns unique tag names, preserving first-seen order.
120pub fn extract_tags(content: &str) -> Vec<String> {
121    let mut seen = HashSet::new();
122    let mut result = Vec::new();
123    let mut i = 0;
124    let chars: Vec<char> = content.chars().collect();
125
126    while i < chars.len() {
127        if chars[i] == '#' {
128            i += 1;
129            let start = i;
130            while i < chars.len()
131                && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == '-')
132            {
133                i += 1;
134            }
135            let tag: String = chars[start..i].iter().collect();
136            if !tag.is_empty() && seen.insert(tag.clone()) {
137                result.push(tag);
138            }
139        } else {
140            i += 1;
141        }
142    }
143    result
144}
145
146const DIRECTIVE_KEYWORDS: &[&str] = &[
147    "action", "info", "review", "decision", "blocked", "claim", "done", "proposal",
148];
149
150/// Extract /directives from message content.
151///
152/// Returns unique directive keywords from lines starting with `/keyword`.
153pub fn extract_directives(content: &str) -> Vec<String> {
154    let mut seen = HashSet::new();
155    let mut result = Vec::new();
156
157    for line in content.lines() {
158        if let Some(rest) = line.trim_start().strip_prefix('/') {
159            // Find the keyword (up to first whitespace or end)
160            let keyword: String = rest.chars().take_while(|c| c.is_alphabetic()).collect();
161            if DIRECTIVE_KEYWORDS.contains(&keyword.as_str()) && seen.insert(keyword.clone()) {
162                result.push(keyword);
163            }
164        }
165    }
166    result
167}
168
169// ── Graph Building ─────────────────────────────────────────────────
170
171/// Build a knowledge graph from an array of messages.
172///
173/// Nodes: agents (from fromAgentId + @mentions), topics (#tags), decisions (/decision messages).
174/// Edges: mentions (agent→agent), discusses (agent→topic), decides (agent→decision).
175pub fn build_graph_native(messages: &[MessageInput]) -> KnowledgeGraph {
176    let mut node_map: HashMap<String, GraphNode> = HashMap::new();
177    let mut edge_map: HashMap<String, GraphEdge> = HashMap::new();
178
179    let get_or_create =
180        |node_map: &mut HashMap<String, GraphNode>, id: &str, node_type: &str, label: &str| {
181            if let Some(node) = node_map.get_mut(id) {
182                node.weight += 1;
183            } else {
184                node_map.insert(
185                    id.to_string(),
186                    GraphNode {
187                        id: id.to_string(),
188                        node_type: node_type.to_string(),
189                        label: label.to_string(),
190                        weight: 1,
191                    },
192                );
193            }
194        };
195
196    let add_edge =
197        |edge_map: &mut HashMap<String, GraphEdge>, source: &str, target: &str, edge_type: &str| {
198            let key = format!("{}\u{2192}{}:{}", source, target, edge_type);
199            if let Some(edge) = edge_map.get_mut(&key) {
200                edge.weight += 1;
201            } else {
202                edge_map.insert(
203                    key,
204                    GraphEdge {
205                        source: source.to_string(),
206                        target: target.to_string(),
207                        edge_type: edge_type.to_string(),
208                        weight: 1,
209                    },
210                );
211            }
212        };
213
214    for msg in messages {
215        let agent_id = &msg.from_agent_id;
216        let mentions = msg
217            .metadata
218            .as_ref()
219            .and_then(|m| m.mentions.clone())
220            .unwrap_or_else(|| extract_mentions(&msg.content));
221        let tags = msg
222            .metadata
223            .as_ref()
224            .and_then(|m| m.tags.clone())
225            .unwrap_or_else(|| extract_tags(&msg.content));
226        let directives = msg
227            .metadata
228            .as_ref()
229            .and_then(|m| m.directives.clone())
230            .unwrap_or_else(|| extract_directives(&msg.content));
231
232        let agent_node_id = format!("agent:{agent_id}");
233        get_or_create(&mut node_map, &agent_node_id, "agent", agent_id);
234
235        for mentioned in &mentions {
236            let mentioned_id = format!("agent:{mentioned}");
237            get_or_create(&mut node_map, &mentioned_id, "agent", mentioned);
238            add_edge(&mut edge_map, &agent_node_id, &mentioned_id, "mentions");
239        }
240
241        for tag in &tags {
242            let topic_id = format!("topic:{tag}");
243            get_or_create(&mut node_map, &topic_id, "topic", tag);
244            add_edge(&mut edge_map, &agent_node_id, &topic_id, "discusses");
245        }
246
247        if directives.contains(&"decision".to_string()) {
248            let decision_id = format!("decision:{}", msg.id);
249            let preview: String = msg
250                .content
251                .chars()
252                .take(80)
253                .collect::<String>()
254                .replace('\n', " ");
255            get_or_create(&mut node_map, &decision_id, "decision", &preview);
256            add_edge(&mut edge_map, &agent_node_id, &decision_id, "decides");
257        }
258    }
259
260    let nodes: Vec<GraphNode> = node_map.into_values().collect();
261    let edges: Vec<GraphEdge> = edge_map.into_values().collect();
262
263    let stats = GraphStats {
264        agent_count: nodes.iter().filter(|n| n.node_type == "agent").count(),
265        topic_count: nodes.iter().filter(|n| n.node_type == "topic").count(),
266        decision_count: nodes.iter().filter(|n| n.node_type == "decision").count(),
267        edge_count: edges.len(),
268    };
269
270    KnowledgeGraph {
271        nodes,
272        edges,
273        stats,
274    }
275}
276
277/// Find the most connected topics in the graph.
278///
279/// Returns topics sorted by number of discussing agents, limited to `limit` entries.
280pub fn top_topics_native(graph: &KnowledgeGraph, limit: usize) -> Vec<TopTopic> {
281    let mut topic_agents: HashMap<String, HashSet<String>> = HashMap::new();
282
283    for edge in &graph.edges {
284        if edge.edge_type == "discusses" {
285            let topic = edge.target.trim_start_matches("topic:").to_string();
286            topic_agents
287                .entry(topic)
288                .or_default()
289                .insert(edge.source.trim_start_matches("agent:").to_string());
290        }
291    }
292
293    let mut result: Vec<TopTopic> = topic_agents
294        .into_iter()
295        .map(|(topic, agents)| TopTopic {
296            topic,
297            agents: agents.len(),
298        })
299        .collect();
300
301    result.sort_by_key(|t| Reverse(t.agents));
302    result.truncate(limit);
303    result
304}
305
306/// Find the most active agents in the graph.
307///
308/// Returns agents sorted by total edge weight, limited to `limit` entries.
309pub fn top_agents_native(graph: &KnowledgeGraph, limit: usize) -> Vec<TopAgent> {
310    let mut agent_activity: HashMap<String, u32> = HashMap::new();
311
312    for edge in &graph.edges {
313        if edge.source.starts_with("agent:") {
314            let agent = edge.source.trim_start_matches("agent:").to_string();
315            *agent_activity.entry(agent).or_default() += edge.weight;
316        }
317    }
318
319    let mut result: Vec<TopAgent> = agent_activity
320        .into_iter()
321        .map(|(agent, activity)| TopAgent { agent, activity })
322        .collect();
323
324    result.sort_by_key(|a| Reverse(a.activity));
325    result.truncate(limit);
326    result
327}
328
329// ── WASM entry points ──────────────────────────────────────────────
330
331/// Extract @mentions from content. Returns JSON array of strings.
332#[cfg_attr(feature = "wasm", wasm_bindgen)]
333pub fn extract_mentions_wasm(content: &str) -> String {
334    serde_json::to_string(&extract_mentions(content)).unwrap_or_else(|_| "[]".to_string())
335}
336
337/// Extract #tags from content. Returns JSON array of strings.
338#[cfg_attr(feature = "wasm", wasm_bindgen)]
339pub fn extract_tags_wasm(content: &str) -> String {
340    serde_json::to_string(&extract_tags(content)).unwrap_or_else(|_| "[]".to_string())
341}
342
343/// Extract /directives from content. Returns JSON array of strings.
344#[cfg_attr(feature = "wasm", wasm_bindgen)]
345pub fn extract_directives_wasm(content: &str) -> String {
346    serde_json::to_string(&extract_directives(content)).unwrap_or_else(|_| "[]".to_string())
347}
348
349/// Build a knowledge graph from a JSON array of MessageInput objects.
350///
351/// Returns a JSON-serialised KnowledgeGraph, or `{"error":"..."}` on failure.
352#[cfg_attr(feature = "wasm", wasm_bindgen)]
353pub fn build_graph_wasm(messages_json: &str) -> String {
354    let messages: Vec<MessageInput> = match serde_json::from_str(messages_json) {
355        Ok(m) => m,
356        Err(e) => return format!(r#"{{"error":"Invalid messages JSON: {e}"}}"#),
357    };
358    let graph = build_graph_native(&messages);
359    serde_json::to_string(&graph).unwrap_or_else(|e| format!(r#"{{"error":"Serialization: {e}"}}"#))
360}
361
362/// Find the most connected topics.
363///
364/// `graph_json` is a serialised KnowledgeGraph. `limit` is the max number of results.
365/// Returns a JSON array of `{ topic, agents }` objects.
366#[cfg_attr(feature = "wasm", wasm_bindgen)]
367pub fn top_topics_wasm(graph_json: &str, limit: u32) -> String {
368    let graph: KnowledgeGraph = match serde_json::from_str(graph_json) {
369        Ok(g) => g,
370        Err(e) => return format!(r#"{{"error":"Invalid graph JSON: {e}"}}"#),
371    };
372    let result = top_topics_native(&graph, limit as usize);
373    serde_json::to_string(&result)
374        .unwrap_or_else(|e| format!(r#"{{"error":"Serialization: {e}"}}"#))
375}
376
377/// Find the most active agents.
378///
379/// `graph_json` is a serialised KnowledgeGraph. `limit` is the max number of results.
380/// Returns a JSON array of `{ agent, activity }` objects.
381#[cfg_attr(feature = "wasm", wasm_bindgen)]
382pub fn top_agents_wasm(graph_json: &str, limit: u32) -> String {
383    let graph: KnowledgeGraph = match serde_json::from_str(graph_json) {
384        Ok(g) => g,
385        Err(e) => return format!(r#"{{"error":"Invalid graph JSON: {e}"}}"#),
386    };
387    let result = top_agents_native(&graph, limit as usize);
388    serde_json::to_string(&result)
389        .unwrap_or_else(|e| format!(r#"{{"error":"Serialization: {e}"}}"#))
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    fn make_msg(id: &str, agent: &str, content: &str) -> MessageInput {
397        MessageInput {
398            id: id.to_string(),
399            from_agent_id: agent.to_string(),
400            content: content.to_string(),
401            metadata: None,
402            created_at: "2026-01-01T00:00:00Z".to_string(),
403        }
404    }
405
406    // ── extract_mentions ──────────────────────────────────────────
407
408    #[test]
409    fn mentions_basic() {
410        let m = extract_mentions("hello @alice and @bob");
411        assert_eq!(m, vec!["alice", "bob"]);
412    }
413
414    #[test]
415    fn mentions_dedup() {
416        let m = extract_mentions("@alice @alice @bob");
417        assert_eq!(m, vec!["alice", "bob"]);
418    }
419
420    #[test]
421    fn mentions_skip_at_all() {
422        let m = extract_mentions("@all @bob");
423        assert_eq!(m, vec!["bob"]);
424    }
425
426    #[test]
427    fn mentions_empty() {
428        let m = extract_mentions("no mentions here");
429        assert!(m.is_empty());
430    }
431
432    // ── extract_tags ──────────────────────────────────────────────
433
434    #[test]
435    fn tags_basic() {
436        let t = extract_tags("working on #rust and #wasm");
437        assert_eq!(t, vec!["rust", "wasm"]);
438    }
439
440    #[test]
441    fn tags_dedup() {
442        let t = extract_tags("#rust #rust #wasm");
443        assert_eq!(t, vec!["rust", "wasm"]);
444    }
445
446    // ── extract_directives ────────────────────────────────────────
447
448    #[test]
449    fn directives_basic() {
450        let d = extract_directives("/decision\n/action\n/info");
451        assert!(d.contains(&"decision".to_string()));
452        assert!(d.contains(&"action".to_string()));
453        assert!(d.contains(&"info".to_string()));
454    }
455
456    #[test]
457    fn directives_unknown_ignored() {
458        let d = extract_directives("/unknown-directive");
459        assert!(d.is_empty());
460    }
461
462    #[test]
463    fn directives_dedup() {
464        let d = extract_directives("/review\n/review");
465        assert_eq!(d.len(), 1);
466    }
467
468    // ── build_graph_native ────────────────────────────────────────
469
470    #[test]
471    fn graph_builds_agent_nodes() {
472        let msgs = vec![
473            make_msg("1", "alice", "hello @bob"),
474            make_msg("2", "bob", "hi @alice"),
475        ];
476        let graph = build_graph_native(&msgs);
477        assert!(graph.nodes.iter().any(|n| n.id == "agent:alice"));
478        assert!(graph.nodes.iter().any(|n| n.id == "agent:bob"));
479    }
480
481    #[test]
482    fn graph_builds_topic_nodes() {
483        let msgs = vec![make_msg("1", "alice", "working on #rust")];
484        let graph = build_graph_native(&msgs);
485        assert!(graph.nodes.iter().any(|n| n.id == "topic:rust"));
486    }
487
488    #[test]
489    fn graph_builds_decision_node() {
490        let msgs = vec![make_msg("1", "alice", "/decision\nuse Rust")];
491        let graph = build_graph_native(&msgs);
492        assert!(graph.nodes.iter().any(|n| n.node_type == "decision"));
493    }
494
495    #[test]
496    fn graph_stats_correct() {
497        let msgs = vec![
498            make_msg("1", "alice", "@bob #topic"),
499            make_msg("2", "bob", "#wasm"),
500        ];
501        let graph = build_graph_native(&msgs);
502        assert_eq!(graph.stats.agent_count, 2);
503        assert_eq!(graph.stats.topic_count, 2);
504    }
505
506    // ── top_topics ────────────────────────────────────────────────
507
508    #[test]
509    fn top_topics_ranking() {
510        let msgs = vec![
511            make_msg("1", "alice", "#rust"),
512            make_msg("2", "bob", "#rust #wasm"),
513            make_msg("3", "charlie", "#rust"),
514        ];
515        let graph = build_graph_native(&msgs);
516        let topics = top_topics_native(&graph, 10);
517        // rust discussed by 3 agents, wasm by 1
518        assert_eq!(topics[0].topic, "rust");
519        assert_eq!(topics[0].agents, 3);
520    }
521
522    // ── top_agents ────────────────────────────────────────────────
523
524    #[test]
525    fn top_agents_ranking() {
526        let msgs = vec![
527            make_msg("1", "alice", "#topic1 #topic2 @bob"),
528            make_msg("2", "bob", "#topic1"),
529        ];
530        let graph = build_graph_native(&msgs);
531        let agents = top_agents_native(&graph, 10);
532        assert_eq!(agents[0].agent, "alice");
533    }
534
535    // ── WASM JSON roundtrip ───────────────────────────────────────
536
537    #[test]
538    fn wasm_extract_mentions_json() {
539        let json = extract_mentions_wasm("hello @alice @bob");
540        let parsed: Vec<String> = serde_json::from_str(&json).unwrap();
541        assert_eq!(parsed, vec!["alice", "bob"]);
542    }
543
544    #[test]
545    fn wasm_build_graph_json() {
546        let msgs_json = r#"[{"id":"1","fromAgentId":"alice","content":"@bob #rust","createdAt":"2026-01-01T00:00:00Z"}]"#;
547        let out = build_graph_wasm(msgs_json);
548        let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
549        assert!(parsed.get("error").is_none());
550        assert!(parsed.get("nodes").is_some());
551    }
552
553    // Byte-identity vectors matching TypeScript graph.ts
554    #[test]
555    fn byte_identity_vec1_mentions() {
556        // TS: extractMentions('@alice and @bob') => ['alice', 'bob']
557        assert_eq!(extract_mentions("@alice and @bob"), vec!["alice", "bob"]);
558    }
559
560    #[test]
561    fn byte_identity_vec2_tags() {
562        // TS: extractTags('working on #rust') => ['rust']
563        assert_eq!(extract_tags("working on #rust"), vec!["rust"]);
564    }
565
566    #[test]
567    fn byte_identity_vec3_directives() {
568        // TS: extractDirectives('/decision\n/action') => ['decision', 'action']
569        let d = extract_directives("/decision\n/action");
570        assert!(d.contains(&"decision".to_string()));
571        assert!(d.contains(&"action".to_string()));
572    }
573}