semantic_search_cli/commands/
index.rs

1//! `index` subcommand
2
3use crate::{
4    util::{hash_file, iter_files, prompt, Database, Record},
5    Config,
6};
7use anyhow::{Context, Result};
8use argh::FromArgs;
9use log::{debug, info, warn};
10use semantic_search::ApiClient;
11
12/// generate index of the files
13#[derive(FromArgs, PartialEq, Eq, Debug)]
14#[argh(subcommand, name = "index", help_triggers("-h", "--help"))]
15pub struct Index {
16    /// skip prompting for labels and use filename or existing label
17    #[argh(switch, short = 'y')]
18    pub yes: bool,
19    /// re-embedding files that hash has changed, useful when you edited the labels externally and conveyed the changes by changing the hash
20    #[argh(switch, short = 'r')]
21    pub re_embed: bool,
22}
23
24/// Summary of the index operation.
25#[derive(Debug, Default)]
26pub struct IndexSummary {
27    /// Number of changed files
28    pub changed: usize,
29    /// Number of new files
30    pub new: usize,
31    /// Number of deleted files
32    pub deleted: usize,
33}
34
35impl Index {
36    /// Index files.
37    #[allow(clippy::future_not_send, reason = "Main function")]
38    pub async fn execute(&self, config: Config) -> Result<IndexSummary> {
39        // The option `yes` and `re_embed` should not be used together
40        if self.yes && self.re_embed {
41            anyhow::bail!("Options -y and -r should not be used together");
42        }
43        let mut db = Database::open(".sense/index.db3", false)
44            .await
45            .with_context(|| "Failed to open database")?;
46        let mut summary = IndexSummary::default();
47        let api = ApiClient::new(&config.api.key, config.api.model)?;
48        let cwd = std::env::current_dir()?.canonicalize()?;
49        summary.deleted = db.clean(&cwd).await?;
50        let files = iter_files(&cwd, &cwd);
51
52        // For all files, calculate hash and write to database
53        for (path, relative) in files {
54            let hash = hash_file(&path)?;
55            let relative = relative.to_string();
56            let existing = db.get(&relative).await?;
57
58            let record = if let Some(mut record) = existing {
59                let hash_changed = record.file_hash != hash;
60                // Warn if the hash has changed
61                if hash_changed {
62                    summary.changed += 1;
63                    debug!("[CHANGED] {relative}: {} -> {hash}", record.file_hash);
64                    warn!("Hash of {relative} has changed, consider relabeling");
65                    record.file_hash = hash;
66                    record.file_id = None; // Reset file_id
67
68                    if self.re_embed {
69                        // Re-embed existing label
70                        info!("Re-embedding {relative}");
71                        record.embedding = api.embed(&record.label).await?.into();
72                    } else if !self.yes {
73                        // Prompt for label
74                        println!("Existing label: {}", record.label);
75                        let label = prompt(&format!("Label for {relative} (empty to keep): "))?;
76                        if label.is_empty() {
77                            println!("Label kept as: {}", record.label);
78                        } else {
79                            record.label = label;
80                            println!("Label updated to: {}", record.label);
81                            record.embedding = api.embed(&relative).await?.into();
82                        }
83                    } else {
84                        // Do nothing if `yes` is set - keep the existing label and embedding
85                        info!("Skipping {relative}");
86                    }
87                } else {
88                    // Nothing changed
89                    debug!("[SAME] {relative}: {hash}");
90                }
91                // Reuse the record
92                record
93            } else {
94                summary.new += 1;
95                debug!("[NEW] {hash}: {relative}");
96                warn!("New file: {relative}, consider labeling");
97
98                let (label, embedding) = if self.yes {
99                    // Use filename as label
100                    let label = path.file_stem().unwrap().to_string_lossy();
101                    (label.to_string(), api.embed(&relative).await?.into())
102                } else {
103                    let label = prompt(&format!("Label for {relative} (empty to use filename): "))?;
104                    if label.is_empty() {
105                        // Use filename as label
106                        let label = path.file_stem().unwrap().to_string_lossy();
107                        (label.to_string(), api.embed(&relative).await?.into())
108                    } else {
109                        let embedding = api.embed(&relative).await?;
110                        (label, embedding.into())
111                    }
112                };
113                Record {
114                    file_path: relative,
115                    file_hash: hash,
116                    file_id: None,
117                    label,
118                    embedding,
119                }
120            };
121
122            db.insert(record).await?;
123        }
124
125        Ok(summary)
126    }
127}