semantic_search_cli/commands/
index.rs

1//! `index` subcommand
2
3use crate::{
4    Config,
5    util::{Database, Record, hash_file, iter_files, prompt},
6};
7use anyhow::{Context, Result};
8use argh::FromArgs;
9use log::{debug, info, warn};
10use semantic_search::ApiClient;
11
12/// generate index of the files
13#[derive(FromArgs, PartialEq, Eq, Debug)]
14#[argh(subcommand, name = "index", help_triggers("-h", "--help"))]
15pub struct Index {
16    /// skip prompting for labels and use filename or existing label
17    #[argh(switch, short = 'y')]
18    pub yes: bool,
19    /// re-embedding files that hash has changed, useful when you edited the labels externally and conveyed the changes by changing the hash
20    #[argh(switch, short = 'r')]
21    pub re_embed: bool,
22}
23
24/// Summary of the index operation.
25#[derive(Debug, Default)]
26pub struct IndexSummary {
27    /// Number of changed files
28    pub changed: usize,
29    /// Number of new files
30    pub new: usize,
31    /// Number of deleted files
32    pub deleted: usize,
33}
34
35impl Index {
36    /// Index files.
37    #[allow(clippy::future_not_send, reason = "Main function")]
38    pub async fn execute(&self, config: Config) -> Result<IndexSummary> {
39        // The option `yes` and `re_embed` should not be used together
40        if self.yes && self.re_embed {
41            anyhow::bail!("Options -y and -r should not be used together");
42        }
43        let mut db = Database::open(".sense/index.db3", false)
44            .await
45            .with_context(|| "Failed to open database")?;
46        let mut summary = IndexSummary::default();
47        let api = ApiClient::new(&config.api.key, config.api.model)?;
48        let cwd = std::env::current_dir()?.canonicalize()?;
49        summary.deleted = db.clean(&cwd).await?;
50        let files = iter_files(&cwd, &cwd);
51
52        // For all files, calculate hash and write to database
53        for (path, relative) in files {
54            let hash = hash_file(&path)?;
55            let relative = relative.to_string();
56            let existing = db.get(&relative).await?;
57
58            // Get updated record
59            let record = if let Some(mut record) = existing {
60                let hash_changed = record.file_hash != hash;
61                // Warn if the hash has changed
62                if hash_changed {
63                    summary.changed += 1;
64                    debug!("[CHANGED] {relative}: {} -> {hash}", record.file_hash);
65                    warn!("Hash of {relative} has changed, consider relabeling");
66                    record.file_hash = hash;
67                    record.file_id = None; // Reset file_id
68
69                    if self.re_embed {
70                        // Re-embed existing label
71                        info!("Re-embedding {relative}");
72                        record.embedding = api.embed(&record.label).await?.into();
73                    } else if !self.yes {
74                        // Prompt for label
75                        println!("Existing label: {}", record.label);
76                        let label = prompt(&format!("Label for {relative} (empty to keep): "))?;
77                        if label.is_empty() {
78                            println!("Label kept as: {}", record.label);
79                        } else {
80                            record.label = label;
81                            println!("Label updated to: {}", record.label);
82                            record.embedding = api.embed(&relative).await?.into();
83                        }
84                    } else {
85                        // Do nothing if `yes` is set - keep the existing label and embedding
86                        info!("Skipping {relative}");
87                    }
88                } else {
89                    // Nothing changed
90                    debug!("[SAME] {relative}: {hash}");
91                    continue; // Skip to next file - this should improve performance
92                }
93                // Reuse the record
94                record
95            } else {
96                summary.new += 1;
97                debug!("[NEW] {hash}: {relative}");
98                warn!("New file: {relative}, consider labeling");
99
100                let (label, embedding) = if self.yes {
101                    // Use filename as label
102                    let label = path.file_stem().unwrap().to_string_lossy();
103                    (label.to_string(), api.embed(&relative).await?.into())
104                } else {
105                    let label = prompt(&format!("Label for {relative} (empty to use filename): "))?;
106                    if label.is_empty() {
107                        // Use filename as label
108                        let label = path.file_stem().unwrap().to_string_lossy();
109                        (label.to_string(), api.embed(&relative).await?.into())
110                    } else {
111                        let embedding = api.embed(&relative).await?;
112                        (label, embedding.into())
113                    }
114                };
115                Record {
116                    file_path: relative,
117                    file_hash: hash,
118                    file_id: None,
119                    label,
120                    embedding,
121                }
122            };
123
124            db.insert(record).await?;
125        }
126
127        Ok(summary)
128    }
129}