Skip to main content

sqlite_graphrag/commands/
link.rs

1//! Handler for the `link` CLI subcommand.
2
3use crate::constants::DEFAULT_RELATION_WEIGHT;
4use crate::entity_type::EntityType;
5use crate::errors::AppError;
6use crate::i18n::{errors_msg, validation};
7use crate::output::{self, OutputFormat};
8use crate::paths::AppPaths;
9use crate::storage::connection::open_rw;
10use crate::storage::entities;
11use crate::storage::entities::NewEntity;
12use rusqlite::params;
13use serde::Serialize;
14
15#[derive(clap::Args)]
16#[command(after_long_help = "EXAMPLES:\n  \
17    # Link two existing graph entities (extracted by GLiNER NER during `remember`)\n  \
18    sqlite-graphrag link --from oauth-flow --to refresh-tokens --relation related\n\n  \
19    # Auto-create entities that don't exist yet\n  \
20    sqlite-graphrag link --from concept-a --to concept-b --relation depends-on --create-missing\n\n  \
21    # Specify entity type for auto-created entities\n  \
22    sqlite-graphrag link --from alice --to acme-corp --relation related --create-missing --entity-type person\n\n  \
23    # Use a custom (non-canonical) relation type\n  \
24    sqlite-graphrag link --from module-a --to module-b --relation implements --create-missing\n\n  \
25    # If the entity does not exist and --create-missing is not set, the command fails with exit 4.\n  \
26    # To list current entity names:\n  \
27    sqlite-graphrag graph entities | jaq '.entities[].name'\n\n  \
28NOTE:\n  \
29    --from and --to expect ENTITY names (graph nodes), not memory names.\n  \
30    Memory names are managed via remember/read/edit/forget; entities are auto-extracted\n  \
31    by GLiNER NER from memory bodies or auto-created via --create-missing.")]
32pub struct LinkArgs {
33    /// Source ENTITY name (graph node, not memory). Entities are extracted by GLiNER NER during
34    /// `remember` or auto-created via `--create-missing`. Use `graph entities` to list
35    /// available entity names. Also accepts the alias `--name`.
36    #[arg(long, alias = "name")]
37    pub from: String,
38    /// Target ENTITY name (graph node, not memory). See `--from` for sourcing entity names.
39    #[arg(long)]
40    pub to: String,
41    /// Relation type between entities. Canonical values: applies-to, uses,
42    /// depends-on, causes, fixes, contradicts, supports, follows, related,
43    /// mentions, replaces, tracked-in. Any kebab-case or snake_case string
44    /// is also accepted as a custom relation.
45    #[arg(long, value_parser = crate::parsers::parse_relation, value_name = "RELATION")]
46    pub relation: String,
47    #[arg(long)]
48    pub weight: Option<f64>,
49    #[arg(long)]
50    pub namespace: Option<String>,
51    #[arg(long, value_enum, default_value = "json")]
52    pub format: OutputFormat,
53    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
54    pub json: bool,
55    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
56    pub db: Option<String>,
57    /// Auto-create entities when they do not exist. Created entities default to
58    /// type `concept` unless `--entity-type` specifies a different type.
59    #[arg(long, default_value_t = false)]
60    pub create_missing: bool,
61    /// Entity type assigned to auto-created entities (only effective with `--create-missing`).
62    #[arg(long, value_enum, default_value = "concept")]
63    pub entity_type: EntityType,
64    /// Reject non-canonical relation types with exit 1.
65    ///
66    /// When set, any relation not in the canonical list causes an immediate error.
67    /// Canonical values: applies-to, uses, depends-on, causes, fixes, contradicts,
68    /// supports, follows, related, mentions, replaces, tracked-in.
69    #[arg(
70        long,
71        default_value_t = false,
72        help = "Reject non-canonical relation types with exit 1"
73    )]
74    pub strict_relations: bool,
75    /// Emit a warning (but do not reject) when creating an edge would push either endpoint
76    /// entity above this degree. Default 50. Set 0 to disable the check.
77    #[arg(long, default_value_t = 50, value_name = "N")]
78    pub max_entity_degree: u32,
79}
80
81#[derive(Serialize)]
82struct LinkResponse {
83    action: String,
84    from: String,
85    to: String,
86    relation: String,
87    weight: f64,
88    namespace: String,
89    /// Total execution time in milliseconds from handler start to serialisation.
90    elapsed_ms: u64,
91    /// Entity names that were auto-created by `--create-missing`.
92    #[serde(skip_serializing_if = "Vec::is_empty")]
93    created_entities: Vec<String>,
94    /// Non-fatal warnings (e.g. non-canonical relation type).
95    #[serde(skip_serializing_if = "Vec::is_empty")]
96    warnings: Vec<String>,
97}
98
99pub fn run(args: LinkArgs) -> Result<(), AppError> {
100    let inicio = std::time::Instant::now();
101    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
102    let paths = AppPaths::resolve(args.db.as_deref())?;
103
104    let norm_from = crate::parsers::normalize_entity_name(&args.from);
105    let norm_to = crate::parsers::normalize_entity_name(&args.to);
106
107    if norm_from == norm_to {
108        return Err(AppError::Validation(validation::self_referential_link()));
109    }
110
111    let weight = args.weight.unwrap_or(DEFAULT_RELATION_WEIGHT);
112    if !(0.0..=1.0).contains(&weight) {
113        return Err(AppError::Validation(validation::invalid_link_weight(
114            weight,
115        )));
116    }
117    if weight >= 0.95 {
118        tracing::warn!(
119            weight = weight,
120            "weight >= 0.95 compresses the scoring range; consider using a value below 0.95"
121        );
122    }
123    if weight <= 0.05 {
124        tracing::warn!(
125            weight = weight,
126            "weight <= 0.05 may be too weak to influence traversal; consider using a value above 0.05"
127        );
128    }
129
130    crate::storage::connection::ensure_db_ready(&paths)?;
131
132    let mut warnings: Vec<String> = Vec::new();
133    let is_canonical = crate::parsers::is_canonical_relation(&args.relation);
134    if !is_canonical {
135        if args.strict_relations {
136            return Err(AppError::Validation(format!(
137                "non-canonical relation '{}': use --strict-relations=false or choose from: {}",
138                args.relation,
139                crate::parsers::CANONICAL_RELATIONS.join(", ")
140            )));
141        }
142        warnings.push(format!("non-canonical relation '{}'", args.relation));
143        tracing::warn!(
144            relation = %args.relation,
145            "non-canonical relation accepted; consider using a well-known value"
146        );
147    }
148    let relation_str = &args.relation;
149
150    let mut conn = open_rw(&paths.db)?;
151    let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
152
153    let mut created_entities: Vec<String> = Vec::with_capacity(2);
154
155    if args.entity_type.as_str() == "memory" {
156        tracing::warn!(
157            entity_type = "memory",
158            "entity_type 'memory' may conflict with memory table semantics; consider using 'concept' or another type"
159        );
160    }
161
162    let source_id = match entities::find_entity_id(&tx, &namespace, &norm_from)? {
163        Some(id) => id,
164        None if args.create_missing => {
165            let new_entity = NewEntity {
166                name: norm_from.clone(),
167                entity_type: args.entity_type,
168                description: None,
169            };
170            created_entities.push(norm_from.clone());
171            entities::upsert_entity(&tx, &namespace, &new_entity)?
172        }
173        None => {
174            return Err(AppError::NotFound(errors_msg::entity_not_found(
175                &norm_from, &namespace,
176            )));
177        }
178    };
179
180    let target_id = match entities::find_entity_id(&tx, &namespace, &norm_to)? {
181        Some(id) => id,
182        None if args.create_missing => {
183            let new_entity = NewEntity {
184                name: norm_to.clone(),
185                entity_type: args.entity_type,
186                description: None,
187            };
188            created_entities.push(norm_to.clone());
189            entities::upsert_entity(&tx, &namespace, &new_entity)?
190        }
191        None => {
192            return Err(AppError::NotFound(errors_msg::entity_not_found(
193                &norm_to, &namespace,
194            )));
195        }
196    };
197
198    let (rel_id, was_created) = entities::create_or_fetch_relationship(
199        &tx,
200        &namespace,
201        source_id,
202        target_id,
203        relation_str,
204        weight,
205        None,
206    )?;
207
208    let actual_weight: f64 = tx.query_row(
209        "SELECT weight FROM relationships WHERE id = ?1",
210        params![rel_id],
211        |r| r.get(0),
212    )?;
213
214    if was_created {
215        entities::recalculate_degree(&tx, source_id)?;
216        entities::recalculate_degree(&tx, target_id)?;
217
218        if args.max_entity_degree > 0 {
219            let cap = args.max_entity_degree as i64;
220            for (entity_id, entity_name) in [(source_id, &norm_from), (target_id, &norm_to)] {
221                let degree: i64 = tx.query_row(
222                    "SELECT degree FROM entities WHERE id = ?1",
223                    params![entity_id],
224                    |r| r.get(0),
225                )?;
226                if degree > cap {
227                    output::emit_progress(&format!(
228                        "WARNING: entity '{entity_name}' degree {degree} exceeds cap {cap}"
229                    ));
230                }
231            }
232        }
233    }
234    tx.commit()?;
235
236    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
237
238    let action = if was_created {
239        "created".to_string()
240    } else {
241        "already_exists".to_string()
242    };
243
244    let response = LinkResponse {
245        action: action.clone(),
246        from: norm_from.clone(),
247        to: norm_to.clone(),
248        relation: relation_str.to_string(),
249        weight: actual_weight,
250        namespace: namespace.clone(),
251        elapsed_ms: inicio.elapsed().as_millis() as u64,
252        created_entities,
253        warnings,
254    };
255
256    match args.format {
257        OutputFormat::Json => output::emit_json(&response)?,
258        OutputFormat::Text | OutputFormat::Markdown => {
259            output::emit_text(&format!(
260                "{}: {} --[{}]--> {} [{}]",
261                action, response.from, response.relation, response.to, response.namespace
262            ));
263        }
264    }
265
266    Ok(())
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn link_response_without_redundant_aliases() {
275        // P1-O: source/target fields were removed from the JSON response.
276        let resp = LinkResponse {
277            action: "created".to_string(),
278            from: "entity-a".to_string(),
279            to: "entity-b".to_string(),
280            relation: "uses".to_string(),
281            weight: 1.0,
282            namespace: "default".to_string(),
283            elapsed_ms: 0,
284            created_entities: vec![],
285            warnings: vec![],
286        };
287        let json = serde_json::to_value(&resp).expect("serialization must work");
288        assert_eq!(json["from"], "entity-a");
289        assert_eq!(json["to"], "entity-b");
290        assert!(
291            json.get("source").is_none(),
292            "field 'source' was removed in P1-O"
293        );
294        assert!(
295            json.get("target").is_none(),
296            "field 'target' was removed in P1-O"
297        );
298    }
299
300    #[test]
301    fn link_response_serializes_all_fields() {
302        let resp = LinkResponse {
303            action: "already_exists".to_string(),
304            from: "origin".to_string(),
305            to: "destination".to_string(),
306            relation: "mentions".to_string(),
307            weight: 0.8,
308            namespace: "test".to_string(),
309            elapsed_ms: 5,
310            created_entities: vec![],
311            warnings: vec![],
312        };
313        let json = serde_json::to_value(&resp).expect("serialization must work");
314        assert!(json.get("action").is_some());
315        assert!(json.get("from").is_some());
316        assert!(json.get("to").is_some());
317        assert!(json.get("relation").is_some());
318        assert!(json.get("weight").is_some());
319        assert!(json.get("namespace").is_some());
320        assert!(json.get("elapsed_ms").is_some());
321    }
322
323    #[test]
324    fn link_response_omits_created_entities_when_empty() {
325        let resp = LinkResponse {
326            action: "created".to_string(),
327            from: "a".to_string(),
328            to: "b".to_string(),
329            relation: "uses".to_string(),
330            weight: 1.0,
331            namespace: "global".to_string(),
332            elapsed_ms: 0,
333            created_entities: vec![],
334            warnings: vec![],
335        };
336        let json = serde_json::to_value(&resp).expect("serialization");
337        assert!(
338            json.get("created_entities").is_none(),
339            "empty vec must be omitted"
340        );
341    }
342
343    #[test]
344    fn link_response_includes_created_entities_when_present() {
345        let resp = LinkResponse {
346            action: "created".to_string(),
347            from: "new-a".to_string(),
348            to: "new-b".to_string(),
349            relation: "depends-on".to_string(),
350            weight: 0.5,
351            namespace: "test".to_string(),
352            elapsed_ms: 1,
353            created_entities: vec!["new-a".to_string(), "new-b".to_string()],
354            warnings: vec![],
355        };
356        let json = serde_json::to_value(&resp).expect("serialization");
357        let created = json["created_entities"].as_array().expect("must be array");
358        assert_eq!(created.len(), 2);
359        assert_eq!(created[0], "new-a");
360        assert_eq!(created[1], "new-b");
361    }
362
363    #[test]
364    fn link_response_includes_warnings_when_non_canonical() {
365        let resp = LinkResponse {
366            action: "created".to_string(),
367            from: "a".to_string(),
368            to: "b".to_string(),
369            relation: "implements".to_string(),
370            weight: 0.5,
371            namespace: "global".to_string(),
372            elapsed_ms: 0,
373            created_entities: vec![],
374            warnings: vec!["non-canonical relation 'implements'".to_string()],
375        };
376        let json = serde_json::to_value(&resp).expect("serialization");
377        let w = json["warnings"]
378            .as_array()
379            .expect("warnings must be present");
380        assert_eq!(w.len(), 1);
381        assert!(w[0].as_str().unwrap().contains("implements"));
382    }
383
384    #[test]
385    fn link_response_omits_warnings_when_empty() {
386        let resp = LinkResponse {
387            action: "created".to_string(),
388            from: "a".to_string(),
389            to: "b".to_string(),
390            relation: "uses".to_string(),
391            weight: 0.5,
392            namespace: "global".to_string(),
393            elapsed_ms: 0,
394            created_entities: vec![],
395            warnings: vec![],
396        };
397        let json = serde_json::to_value(&resp).expect("serialization");
398        assert!(
399            json.get("warnings").is_none(),
400            "empty warnings must be omitted"
401        );
402    }
403}