Skip to main content

sqlite_graphrag/commands/
link.rs

1//! Handler for the `link` CLI subcommand.
2
3use crate::constants::DEFAULT_RELATION_WEIGHT;
4use crate::entity_type::EntityType;
5use crate::errors::AppError;
6use crate::i18n::{errors_msg, validation};
7use crate::output::{self, OutputFormat};
8use crate::paths::AppPaths;
9use crate::storage::connection::open_rw;
10use crate::storage::entities;
11use crate::storage::entities::NewEntity;
12use rusqlite::params;
13use serde::Serialize;
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n  \
17    # Link two existing graph entities (extracted by GLiNER NER during `remember`)\n  \
18    sqlite-graphrag link --from oauth-flow --to refresh-tokens --relation related\n\n  \
19    # Auto-create entities that don't exist yet\n  \
20    sqlite-graphrag link --from concept-a --to concept-b --relation depends-on --create-missing\n\n  \
21    # Specify entity type for auto-created entities\n  \
22    sqlite-graphrag link --from alice --to acme-corp --relation related --create-missing --entity-type person\n\n  \
23    # Use a custom (non-canonical) relation type\n  \
24    sqlite-graphrag link --from module-a --to module-b --relation implements --create-missing\n\n  \
25    # If the entity does not exist and --create-missing is not set, the command fails with exit 4.\n  \
26    # To list current entity names:\n  \
27    sqlite-graphrag graph entities | jaq '.entities[].name'\n\n  \
28NOTE:\n  \
29LOCK WAITING:\n  \
30    The root-level --wait-lock SECONDS flag (default 30s) controls how long\n  \
31    the link/unlink subcommands wait for the global CLI lock before failing\n  \
32    with exit 15. In a cold start (first call in a new namespace), the lock\n  \
33    acquisition may exceed the default wait. CI pipelines should pass\n  \
34    --wait-lock 60 for headroom. The link command emits a tracing::info!\n  \
35    diagnostic when the wait exceeds 5 seconds so operators can correlate\n  \
36    cold-start latency with this CLI invocation.\n\n  \
37    --from and --to expect ENTITY names (graph nodes), not memory names.\n  \
38    Memory names are managed via remember/read/edit/forget; entities are auto-extracted\n  \
39    by GLiNER NER from memory bodies or auto-created via --create-missing.")]
40pub struct LinkArgs {
41    /// Source ENTITY name (graph node, not memory). Entities are extracted by GLiNER NER during
42    /// `remember` or auto-created via `--create-missing`. Use `graph entities` to list
43    /// available entity names. Also accepts the alias `--name`.
44    #[arg(long, alias = "name")]
45    pub from: String,
46    /// Target ENTITY name (graph node, not memory). See `--from` for sourcing entity names.
47    #[arg(long)]
48    pub to: String,
49    /// Relation type between entities. Canonical values: applies-to, uses,
50    /// depends-on, causes, fixes, contradicts, supports, follows, related,
51    /// mentions, replaces, tracked-in. Any kebab-case or snake_case string
52    /// is also accepted as a custom relation.
53    #[arg(long, value_parser = crate::parsers::parse_relation, value_name = "RELATION")]
54    pub relation: String,
55    #[arg(long)]
56    pub weight: Option<f64>,
57    #[arg(long)]
58    pub namespace: Option<String>,
59    #[arg(long, value_enum, default_value = "json")]
60    pub format: OutputFormat,
61    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
62    pub json: bool,
63    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
64    pub db: Option<String>,
65    /// Auto-create entities when they do not exist. Created entities default to
66    /// type `concept` unless `--entity-type` specifies a different type.
67    #[arg(long, default_value_t = false)]
68    pub create_missing: bool,
69    /// Entity type assigned to auto-created entities (only effective with `--create-missing`).
70    #[arg(long, value_enum, default_value = "concept")]
71    pub entity_type: EntityType,
72    /// Reject non-canonical relation types with exit 1.
73    ///
74    /// When set, any relation not in the canonical list causes an immediate error.
75    /// Canonical values: applies-to, uses, depends-on, causes, fixes, contradicts,
76    /// supports, follows, related, mentions, replaces, tracked-in.
77    #[arg(
78        long,
79        default_value_t = false,
80        help = "Reject non-canonical relation types with exit 1"
81    )]
82    pub strict_relations: bool,
83    /// Emit a warning (but do not reject) when creating an edge would push either endpoint
84    /// entity above this degree. Default 50. Set 0 to disable the check.
85    #[arg(long, default_value_t = 50, value_name = "N")]
86    pub max_entity_degree: u32,
87}
88
89#[derive(Serialize)]
90struct LinkResponse {
91    action: String,
92    from: String,
93    to: String,
94    relation: String,
95    weight: f64,
96    namespace: String,
97    /// Total execution time in milliseconds from handler start to serialisation.
98    elapsed_ms: u64,
99    /// Entity names that were auto-created by `--create-missing`.
100    #[serde(skip_serializing_if = "Vec::is_empty")]
101    created_entities: Vec<String>,
102    /// Non-fatal warnings (e.g. non-canonical relation type).
103    #[serde(skip_serializing_if = "Vec::is_empty")]
104    warnings: Vec<String>,
105}
106
107pub fn run(args: LinkArgs) -> Result<(), AppError> {
108    let inicio = std::time::Instant::now();
109    tracing::debug!(target: "link", from = %args.from, to = %args.to, relation = %args.relation, "creating relationship");
110    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
111    let paths = AppPaths::resolve(args.db.as_deref())?;
112
113    // BUG-13 (v1.0.88): validate the ORIGINAL entity names BEFORE
114    // normalization. Normalizing "RUST" to "rust" would silently bypass
115    // the ALL_CAPS short-name guard (>=2 chars, no newlines, no ALL_CAPS
116    // <=4 chars). The test `link_rejects_four_char_all_caps_v1088`
117    // guards against the bypass.
118    if let Err(msg) = crate::storage::entities::validate_entity_name(&args.from) {
119        return Err(AppError::Validation(msg.to_string()));
120    }
121    if let Err(msg) = crate::storage::entities::validate_entity_name(&args.to) {
122        return Err(AppError::Validation(msg.to_string()));
123    }
124
125    let norm_from = crate::parsers::normalize_entity_name(&args.from);
126    let norm_to = crate::parsers::normalize_entity_name(&args.to);
127
128    if norm_from == norm_to {
129        return Err(AppError::Validation(validation::self_referential_link()));
130    }
131
132    let weight = args.weight.unwrap_or(DEFAULT_RELATION_WEIGHT);
133    if !(0.0..=1.0).contains(&weight) {
134        return Err(AppError::Validation(validation::invalid_link_weight(
135            weight,
136        )));
137    }
138    if weight >= 0.95 {
139        tracing::warn!(target: "link",
140            weight = weight,
141            "weight >= 0.95 compresses the scoring range; consider using a value below 0.95"
142        );
143    }
144    if weight <= 0.05 {
145        tracing::warn!(target: "link",
146            weight = weight,
147            "weight <= 0.05 may be too weak to influence traversal; consider using a value above 0.05"
148        );
149    }
150
151    crate::storage::connection::ensure_db_ready(&paths)?;
152
153    let mut warnings: Vec<String> = Vec::with_capacity(2);
154    let is_canonical = crate::parsers::is_canonical_relation(&args.relation);
155    if !is_canonical {
156        if args.strict_relations {
157            return Err(AppError::Validation(format!(
158                "non-canonical relation '{}': use --strict-relations=false or choose from: {}",
159                args.relation,
160                crate::parsers::CANONICAL_RELATIONS.join(", ")
161            )));
162        }
163        warnings.push(format!("non-canonical relation '{}'", args.relation));
164        tracing::warn!(target: "link",
165            relation = %args.relation,
166            "non-canonical relation accepted; consider using a well-known value"
167        );
168    }
169    let relation_str = &args.relation;
170
171    let mut conn = open_rw(&paths.db)?;
172    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
173
174    let mut created_entities: Vec<String> = Vec::with_capacity(2);
175
176    if args.entity_type.as_str() == "memory" {
177        tracing::warn!(target: "link",
178            entity_type = "memory",
179            "entity_type 'memory' may conflict with memory table semantics; consider using 'concept' or another type"
180        );
181    }
182
183    let source_id = match entities::find_entity_id(&tx, &namespace, &norm_from)? {
184        Some(id) => id,
185        None if args.create_missing => {
186            let new_entity = NewEntity {
187                name: norm_from.clone(),
188                entity_type: args.entity_type,
189                description: None,
190            };
191            created_entities.push(norm_from.clone());
192            entities::upsert_entity(&tx, &namespace, &new_entity)?
193        }
194        None => {
195            return Err(AppError::NotFound(errors_msg::entity_not_found(
196                &norm_from, &namespace,
197            )));
198        }
199    };
200
201    let target_id = match entities::find_entity_id(&tx, &namespace, &norm_to)? {
202        Some(id) => id,
203        None if args.create_missing => {
204            let new_entity = NewEntity {
205                name: norm_to.clone(),
206                entity_type: args.entity_type,
207                description: None,
208            };
209            created_entities.push(norm_to.clone());
210            entities::upsert_entity(&tx, &namespace, &new_entity)?
211        }
212        None => {
213            return Err(AppError::NotFound(errors_msg::entity_not_found(
214                &norm_to, &namespace,
215            )));
216        }
217    };
218
219    let (rel_id, was_created) = entities::create_or_fetch_relationship(
220        &tx,
221        &namespace,
222        source_id,
223        target_id,
224        relation_str,
225        weight,
226        None,
227    )?;
228
229    let actual_weight: f64 = tx.query_row(
230        "SELECT weight FROM relationships WHERE id = ?1",
231        params![rel_id],
232        |r| r.get(0),
233    )?;
234
235    if was_created {
236        entities::recalculate_degree(&tx, source_id)?;
237        entities::recalculate_degree(&tx, target_id)?;
238
239        if args.max_entity_degree > 0 {
240            let cap = args.max_entity_degree as i64;
241            for (entity_id, entity_name) in [(source_id, &norm_from), (target_id, &norm_to)] {
242                let degree: i64 = tx.query_row(
243                    "SELECT degree FROM entities WHERE id = ?1",
244                    params![entity_id],
245                    |r| r.get(0),
246                )?;
247                if degree > cap {
248                    output::emit_progress(&format!(
249                        "WARNING: entity '{entity_name}' degree {degree} exceeds cap {cap}"
250                    ));
251                }
252            }
253        }
254    }
255    tx.commit()?;
256
257    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
258
259    let action = if was_created {
260        "created".to_string()
261    } else {
262        "already_exists".to_string()
263    };
264
265    let response = LinkResponse {
266        action: action.clone(),
267        from: norm_from.clone(),
268        to: norm_to.clone(),
269        relation: relation_str.to_string(),
270        weight: actual_weight,
271        namespace: namespace.clone(),
272        elapsed_ms: inicio.elapsed().as_millis() as u64,
273        created_entities,
274        warnings,
275    };
276
277    match args.format {
278        OutputFormat::Json => output::emit_json(&response)?,
279        OutputFormat::Text | OutputFormat::Markdown => {
280            output::emit_text(&format!(
281                "{}: {} --[{}]--> {} [{}]",
282                action, response.from, response.relation, response.to, response.namespace
283            ));
284        }
285    }
286
287    Ok(())
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn link_response_without_redundant_aliases() {
296        // P1-O: source/target fields were removed from the JSON response.
297        let resp = LinkResponse {
298            action: "created".to_string(),
299            from: "entity-a".to_string(),
300            to: "entity-b".to_string(),
301            relation: "uses".to_string(),
302            weight: 1.0,
303            namespace: "default".to_string(),
304            elapsed_ms: 0,
305            created_entities: vec![],
306            warnings: vec![],
307        };
308        let json = serde_json::to_value(&resp).expect("serialization must work");
309        assert_eq!(json["from"], "entity-a");
310        assert_eq!(json["to"], "entity-b");
311        assert!(
312            json.get("source").is_none(),
313            "field 'source' was removed in P1-O"
314        );
315        assert!(
316            json.get("target").is_none(),
317            "field 'target' was removed in P1-O"
318        );
319    }
320
321    #[test]
322    fn link_response_serializes_all_fields() {
323        let resp = LinkResponse {
324            action: "already_exists".to_string(),
325            from: "origin".to_string(),
326            to: "destination".to_string(),
327            relation: "mentions".to_string(),
328            weight: 0.8,
329            namespace: "test".to_string(),
330            elapsed_ms: 5,
331            created_entities: vec![],
332            warnings: vec![],
333        };
334        let json = serde_json::to_value(&resp).expect("serialization must work");
335        assert!(json.get("action").is_some());
336        assert!(json.get("from").is_some());
337        assert!(json.get("to").is_some());
338        assert!(json.get("relation").is_some());
339        assert!(json.get("weight").is_some());
340        assert!(json.get("namespace").is_some());
341        assert!(json.get("elapsed_ms").is_some());
342    }
343
344    #[test]
345    fn link_response_omits_created_entities_when_empty() {
346        let resp = LinkResponse {
347            action: "created".to_string(),
348            from: "a".to_string(),
349            to: "b".to_string(),
350            relation: "uses".to_string(),
351            weight: 1.0,
352            namespace: "global".to_string(),
353            elapsed_ms: 0,
354            created_entities: vec![],
355            warnings: vec![],
356        };
357        let json = serde_json::to_value(&resp).expect("serialization");
358        assert!(
359            json.get("created_entities").is_none(),
360            "empty vec must be omitted"
361        );
362    }
363
364    #[test]
365    fn link_response_includes_created_entities_when_present() {
366        let resp = LinkResponse {
367            action: "created".to_string(),
368            from: "new-a".to_string(),
369            to: "new-b".to_string(),
370            relation: "depends-on".to_string(),
371            weight: 0.5,
372            namespace: "test".to_string(),
373            elapsed_ms: 1,
374            created_entities: vec!["new-a".to_string(), "new-b".to_string()],
375            warnings: vec![],
376        };
377        let json = serde_json::to_value(&resp).expect("serialization");
378        let created = json["created_entities"].as_array().expect("must be array");
379        assert_eq!(created.len(), 2);
380        assert_eq!(created[0], "new-a");
381        assert_eq!(created[1], "new-b");
382    }
383
384    #[test]
385    fn link_response_includes_warnings_when_non_canonical() {
386        let resp = LinkResponse {
387            action: "created".to_string(),
388            from: "a".to_string(),
389            to: "b".to_string(),
390            relation: "implements".to_string(),
391            weight: 0.5,
392            namespace: "global".to_string(),
393            elapsed_ms: 0,
394            created_entities: vec![],
395            warnings: vec!["non-canonical relation 'implements'".to_string()],
396        };
397        let json = serde_json::to_value(&resp).expect("serialization");
398        let w = json["warnings"]
399            .as_array()
400            .expect("warnings must be present");
401        assert_eq!(w.len(), 1);
402        assert!(w[0].as_str().unwrap().contains("implements"));
403    }
404
405    #[test]
406    fn link_response_omits_warnings_when_empty() {
407        let resp = LinkResponse {
408            action: "created".to_string(),
409            from: "a".to_string(),
410            to: "b".to_string(),
411            relation: "uses".to_string(),
412            weight: 0.5,
413            namespace: "global".to_string(),
414            elapsed_ms: 0,
415            created_entities: vec![],
416            warnings: vec![],
417        };
418        let json = serde_json::to_value(&resp).expect("serialization");
419        assert!(
420            json.get("warnings").is_none(),
421            "empty warnings must be omitted"
422        );
423    }
424}