Skip to main content

sqlite_graphrag/commands/
reclassify.rs

1//! Handler for the `reclassify` CLI subcommand (GAP-18).
2//!
3//! Reclassifies one entity (single mode) or a whole group of entities (batch
4//! mode) by updating the `type` column in the `entities` table.
5//!
6//! Single mode: `--name <entity>` changes the type of one entity.
7//! Batch mode: `--from-type <old> --to-type <new> --batch` changes every
8//! entity in the namespace that currently has `<old>` as its type.
9
10use crate::entity_type::EntityType;
11use crate::errors::AppError;
12use crate::i18n::errors_msg;
13use crate::output::{self, OutputFormat};
14use crate::paths::AppPaths;
15use crate::storage::connection::open_rw;
16use crate::storage::entities;
17use rusqlite::params;
18use serde::Serialize;
19
20#[derive(clap::Args)]
21#[command(after_long_help = "EXAMPLES:\n  \
22    # Reclassify a single entity from its current type to 'tool'\n  \
23    sqlite-graphrag reclassify --name tokio-runtime --new-type tool\n\n  \
24    # Reclassify all 'concept' entities to 'tool' in one shot (batch)\n  \
25    sqlite-graphrag reclassify --from-type concept --to-type tool --batch\n\n  \
26    # Reclassify in a specific namespace\n  \
27    sqlite-graphrag reclassify --name alice --new-type person --namespace my-project\n\n\
28NOTE:\n  \
29    Single mode requires --name and --new-type.\n  \
30    Batch mode requires --from-type, --to-type and --batch.\n  \
31    Providing --name together with --batch is an error.\n\n\
32VALID ENTITY TYPES:\n  \
33    project, tool, person, file, concept, incident, decision,\n  \
34    memory, dashboard, issue_tracker, organization, location, date")]
35pub struct ReclassifyArgs {
36    /// Entity name to reclassify (single mode). Mutually exclusive with --from-type + --batch.
37    #[arg(long, conflicts_with_all = ["from_type", "batch"])]
38    pub name: Option<String>,
39    /// New entity type for single mode.
40    #[arg(long, value_enum, value_name = "TYPE")]
41    pub new_type: Option<EntityType>,
42    /// New description for the entity (single mode only). Ignored in batch mode.
43    #[arg(long, value_name = "TEXT")]
44    pub description: Option<String>,
45    /// Current entity type to match in batch mode. Requires --to-type and --batch.
46    #[arg(
47        long,
48        value_enum,
49        value_name = "TYPE",
50        requires = "to_type",
51        requires = "batch"
52    )]
53    pub from_type: Option<EntityType>,
54    /// New entity type to assign in batch mode. Requires --from-type and --batch.
55    #[arg(long, value_enum, value_name = "TYPE", requires = "from_type")]
56    pub to_type: Option<EntityType>,
57    /// Enable batch reclassification (--from-type to --to-type). Requires --from-type and --to-type.
58    #[arg(long, default_value_t = false, requires = "from_type")]
59    pub batch: bool,
60    #[arg(long)]
61    pub namespace: Option<String>,
62    #[arg(long, value_enum, default_value = "json")]
63    pub format: OutputFormat,
64    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
65    pub json: bool,
66    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
67    pub db: Option<String>,
68}
69
70#[derive(Serialize)]
71struct ReclassifyResponse {
72    action: String,
73    count: usize,
74    #[serde(skip_serializing_if = "Option::is_none")]
75    description_updated: Option<bool>,
76    namespace: String,
77    /// Total execution time in milliseconds from handler start to serialisation.
78    elapsed_ms: u64,
79}
80
81pub fn run(args: ReclassifyArgs) -> Result<(), AppError> {
82    let inicio = std::time::Instant::now();
83    let namespace = crate::namespace::resolve_namespace(args.namespace.as_deref())?;
84    let paths = AppPaths::resolve(args.db.as_deref())?;
85
86    crate::storage::connection::ensure_db_ready(&paths)?;
87
88    let mut conn = open_rw(&paths.db)?;
89
90    let count = if args.batch {
91        // Batch mode: --from-type + --to-type + --batch
92        let from_type = args.from_type.ok_or_else(|| {
93            AppError::Validation("--from-type is required in batch mode".to_string())
94        })?;
95        let to_type = args.to_type.ok_or_else(|| {
96            AppError::Validation("--to-type is required in batch mode".to_string())
97        })?;
98
99        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
100        let affected = tx.execute(
101            "UPDATE entities SET type = ?1, updated_at = unixepoch()
102             WHERE type = ?2 AND namespace = ?3",
103            params![to_type.as_str(), from_type.as_str(), namespace],
104        )?;
105        tx.commit()?;
106        if affected == 0 {
107            tracing::warn!(
108                from_type = from_type.as_str(),
109                namespace = %namespace,
110                "reclassify batch matched zero entities — verify --from-type value exists"
111            );
112        }
113        affected
114    } else {
115        // Single mode: --name + --new-type
116        let entity_name = args
117            .name
118            .as_deref()
119            .ok_or_else(|| AppError::Validation("--name is required in single mode".to_string()))?;
120        let new_type = args.new_type.ok_or_else(|| {
121            AppError::Validation("--new-type is required in single mode".to_string())
122        })?;
123
124        // Verify entity exists.
125        entities::find_entity_id(&conn, &namespace, entity_name)?.ok_or_else(|| {
126            AppError::NotFound(errors_msg::entity_not_found(entity_name, &namespace))
127        })?;
128
129        let tx = conn.transaction_with_behavior(rusqlite::TransactionBehavior::Immediate)?;
130        let affected = tx.execute(
131            "UPDATE entities SET type = ?1, updated_at = unixepoch()
132             WHERE name = ?2 AND namespace = ?3",
133            params![new_type.as_str(), entity_name, namespace],
134        )?;
135        if let Some(ref desc) = args.description {
136            tx.execute(
137                "UPDATE entities SET description = ?1, updated_at = unixepoch()
138                 WHERE name = ?2 AND namespace = ?3",
139                params![desc, entity_name, namespace],
140            )?;
141        }
142        tx.commit()?;
143        affected
144    };
145
146    conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);")?;
147
148    let response = ReclassifyResponse {
149        action: "reclassified".to_string(),
150        count,
151        description_updated: if args.description.is_some() {
152            Some(true)
153        } else {
154            None
155        },
156        namespace: namespace.clone(),
157        elapsed_ms: inicio.elapsed().as_millis() as u64,
158    };
159
160    match args.format {
161        OutputFormat::Json => output::emit_json(&response)?,
162        OutputFormat::Text | OutputFormat::Markdown => {
163            output::emit_text(&format!(
164                "reclassified: {} entities [{}]",
165                response.count, response.namespace
166            ));
167        }
168    }
169
170    Ok(())
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn reclassify_response_serializes_all_fields() {
179        let resp = ReclassifyResponse {
180            action: "reclassified".to_string(),
181            count: 5,
182            description_updated: None,
183            namespace: "global".to_string(),
184            elapsed_ms: 12,
185        };
186        let json = serde_json::to_value(&resp).expect("serialization failed");
187        assert_eq!(json["action"], "reclassified");
188        assert_eq!(json["count"], 5);
189        assert_eq!(json["namespace"], "global");
190        assert!(json["elapsed_ms"].is_number());
191        assert!(json.get("description_updated").is_none());
192    }
193
194    #[test]
195    fn reclassify_response_count_zero_is_valid() {
196        let resp = ReclassifyResponse {
197            action: "reclassified".to_string(),
198            count: 0,
199            description_updated: None,
200            namespace: "my-project".to_string(),
201            elapsed_ms: 3,
202        };
203        let json = serde_json::to_value(&resp).expect("serialization failed");
204        assert_eq!(json["count"], 0);
205        assert_eq!(json["action"], "reclassified");
206    }
207
208    #[test]
209    fn reclassify_response_action_is_reclassified() {
210        let resp = ReclassifyResponse {
211            action: "reclassified".to_string(),
212            count: 1,
213            description_updated: None,
214            namespace: "ns".to_string(),
215            elapsed_ms: 1,
216        };
217        assert_eq!(resp.action, "reclassified");
218    }
219
220    #[test]
221    fn reclassify_response_description_updated_present_when_set() {
222        let resp = ReclassifyResponse {
223            action: "reclassified".to_string(),
224            count: 1,
225            description_updated: Some(true),
226            namespace: "global".to_string(),
227            elapsed_ms: 2,
228        };
229        let json = serde_json::to_value(&resp).expect("serialization failed");
230        assert_eq!(json["description_updated"], true);
231    }
232}