Skip to main content

codesearch/
constants.rs

1//! Central constants for codesearch configuration
2//!
3//! All string literals for paths, filenames, and configuration should be defined here
4//! to avoid duplication and ensure consistency across the codebase.
5
6use std::path::PathBuf;
7use std::sync::atomic::{AtomicBool, Ordering};
8
9/// Global shutdown flag, set by the CTRL-C handler.
10///
11/// This uses a raw `AtomicBool` instead of relying solely on `CancellationToken`
12/// because the indexing pipeline is largely synchronous (ONNX inference, file I/O)
13/// and the flag must be visible from any thread without async polling.
14///
15/// Checked between files and between embedding mini-batches so that CTRL-C
16/// is honoured within a few seconds even during heavy CPU work.
17pub static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
18
19/// Check whether a graceful shutdown has been requested (CTRL-C).
20#[inline]
21pub fn is_shutdown_requested() -> bool {
22    SHUTDOWN_REQUESTED.load(Ordering::SeqCst)
23}
24
25/// Check whether a graceful shutdown has been requested via either
26/// the global AtomicBool (OS signal) or a CancellationToken.
27///
28/// This helper consolidates the two shutdown mechanisms used throughout the codebase
29/// to reduce duplication and improve maintainability.
30#[inline]
31pub fn check_shutdown(cancel_token: &tokio_util::sync::CancellationToken) -> bool {
32    is_shutdown_requested() || cancel_token.is_cancelled()
33}
34
35/// Name of the database directory in project roots
36pub const DB_DIR_NAME: &str = ".codesearch.db";
37
38/// Name of the global config directory in user home
39pub const CONFIG_DIR_NAME: &str = ".codesearch";
40
41/// Name of the file metadata database
42pub const FILE_META_DB_NAME: &str = "file_meta.json";
43
44/// Subdirectory name for embedding models within the global config dir
45const MODELS_SUBDIR: &str = "models";
46
47/// Log directory name within .codesearch.db
48pub const LOG_DIR_NAME: &str = "logs";
49
50/// Default log file name
51pub const LOG_FILE_NAME: &str = "codesearch.log";
52
53/// Default number of log files to retain
54pub const DEFAULT_LOG_MAX_FILES: usize = 5;
55
56/// Default log retention period in days
57pub const DEFAULT_LOG_RETENTION_DAYS: u64 = 5;
58
59/// Get the global models cache directory (~/.codesearch/models/).
60///
61/// This centralizes embedding model downloads so they are shared across all
62/// databases instead of being duplicated per-project. The directory is created
63/// if it does not exist.
64///
65/// Falls back to a temp directory if the home directory cannot be determined.
66pub fn get_global_models_cache_dir() -> anyhow::Result<PathBuf> {
67    let base =
68        dirs::home_dir().ok_or_else(|| anyhow::anyhow!("Could not determine home directory"))?;
69
70    let models_dir = base.join(CONFIG_DIR_NAME).join(MODELS_SUBDIR);
71
72    if !models_dir.exists() {
73        std::fs::create_dir_all(&models_dir).map_err(|e| {
74            anyhow::anyhow!(
75                "Failed to create global models cache directory {}: {}",
76                models_dir.display(),
77                e
78            )
79        })?;
80    }
81
82    Ok(models_dir)
83}
84
85/// Name of the repos configuration file
86pub const REPOS_CONFIG_FILE: &str = "repos.json";
87
88/// Default LMDB map size in megabytes (256MB).
89///
90/// This is the maximum virtual address space reserved for the memory-mapped database.
91/// On Linux/macOS this is just an address space reservation (no physical RAM until data is written).
92/// On Windows the file may be pre-allocated to this size, so keeping it small matters.
93/// 512MB is sufficient for most codebases (~100k chunks × ~5KB = ~512MB).
94/// Override with `CODESEARCH_LMDB_MAP_SIZE_MB` environment variable.
95pub const DEFAULT_LMDB_MAP_SIZE_MB: usize = 512;
96
97/// Default embedding cache memory limit in MB.
98///
99/// The embedding cache stores recently computed embeddings in memory (Moka LRU cache)
100/// to avoid re-computing them during incremental indexing. This is real physical memory.
101/// 100MB is sufficient since files are processed sequentially during indexing.
102/// Override with `CODESEARCH_CACHE_MAX_MEMORY` environment variable.
103pub const DEFAULT_CACHE_MAX_MEMORY_MB: usize = 100;
104
105/// File watcher debounce time in milliseconds
106pub const DEFAULT_FSW_DEBOUNCE_MS: u64 = 2000;
107
108/// Lock file name to indicate an active writer instance
109/// This prevents multiple processes from writing to the same database
110pub const WRITER_LOCK_FILE: &str = ".writer.lock";
111
112/// Directories and files that should always be excluded from indexing
113/// These are added to both .gitignore and .codesearchignore automatically
114pub const ALWAYS_EXCLUDED: &[&str] = &[
115    // Codesearch databases
116    ".codesearch",
117    ".codesearch.db",
118    ".codesearch.dbs",
119    // Fastembed cache
120    "fastembed_cache",
121    // Version control
122    ".git",
123    ".svn",
124    ".hg",
125    // Build artifacts
126    "node_modules",
127    "target",
128    "dist",
129    "build",
130    "out",
131    // Python
132    "__pycache__",
133    ".pytest_cache",
134    ".tox",
135    "venv",
136    ".venv",
137    // Ruby
138    "vendor",
139    ".bundle",
140    // Java
141    ".gradle",
142    ".m2",
143    // IDE
144    ".idea",
145    ".vscode",
146    ".vs",
147    // Other
148    "coverage",
149    ".nyc_output",
150    ".cache",
151];