liboxen/constants.rs
1//! Constants used throughout the codebase
2//!
3
4use crate::core::versions::MinOxenVersion;
5
6/// Rust library version
7pub const OXEN_VERSION: &str = env!("CARGO_PKG_VERSION");
8
9/// # Filenames and dirs
10/// .oxen is the name of the hidden directory where all our data lives
11pub const OXEN_HIDDEN_DIR: &str = ".oxen";
12/// Folder name for oxen home within `.cache`, `.config`., etc.
13pub const OXEN: &str = "oxen";
14/// ~/.config/oxen holds config files
15pub const CONFIG_DIR: &str = ".config";
16/// .oxenignore is the name of the file that contains the ignore patterns
17pub const OXEN_IGNORE_FILE: &str = ".oxenignore";
18/// Root path for repositories
19pub const ROOT_PATH: &str = "/";
20/// Config file for the repository
21pub const REPO_CONFIG_FILENAME: &str = "config.toml";
22/// HEAD file holds onto where the head commit is (commit_id or branch name)
23pub const HEAD_FILE: &str = "HEAD";
24/// refs/ is a key,val store of branch names to commit ids
25pub const REFS_DIR: &str = "refs";
26/// history/ dir is a list of directories named after commit ids
27pub const HISTORY_DIR: &str = "history";
28/// commits/ is a key-value database of commit ids to commit objects
29pub const COMMITS_DIR: &str = "commits";
30/// prefix for the cached stats dirs
31pub const CACHE_DIR: &str = "cache";
32/// prefix for the commit count dirs
33pub const COMMIT_COUNT_DIR: &str = "cache/commit_count";
34/// name of the schema db
35pub const SCHEMAS_DIR: &str = "schemas";
36/// schemas node in merkle tree
37pub const SCHEMAS_TREE_PREFIX: &str = ".oxen";
38// name of dir for locking branches during push
39pub const BRANCH_LOCKS_DIR: &str = "locks";
40// name of file for locking repository during push
41pub const REPOSITORY_LOCK_FILE: &str = "LOCK";
42/// prefix for the commit rows
43pub const ROWS_DIR: &str = "rows";
44/// prefix for the commit entry files
45pub const FILES_DIR: &str = "files";
46/// prefix for the cached dataframes
47pub const DATA_FRAMES_DIR: &str = "data_frames";
48/// prefix for the cached dataframes
49pub const DATA_FRAME_STATUS_DIR: &str = "data_frame_status";
50/// prefix for the commit entry dirs
51pub const DIRS_DIR: &str = "dirs";
52/// prefix for a commit dir => hash maping
53pub const DIR_HASHES_DIR: &str = "dir_hashes";
54/// prefix for the commit merkle tree db
55pub const TREE_DIR: &str = "tree";
56/// prefix for the commit merkle tree node dbs
57pub const NODES_DIR: &str = "nodes";
58/// prefix for cached compare dfs
59pub const COMPARES_DIR: &str = "compares";
60/// prefix for the left commit pointer in cached compares
61pub const LEFT_COMPARE_COMMIT: &str = "LEFT";
62/// prefix for the right commit pointer in cached compares
63pub const RIGHT_COMPARE_COMMIT: &str = "RIGHT";
64/// prefix for the stats dir
65pub const STATS_DIR: &str = "stats";
66/// prefix for the staged dirs
67pub const STAGED_DIR: &str = "staged";
68/// Name of the table in the duckdb db used for remote staging
69pub const TABLE_NAME: &str = "df";
70/// Oxen's internal row id column in duckdb remote staging tables
71pub const OXEN_COLS: [&str; 4] = [OXEN_ID_COL, DIFF_STATUS_COL, OXEN_ROW_ID_COL, DIFF_HASH_COL];
72/// Oxen's internal row id column to exclude from dfs
73pub const EXCLUDE_OXEN_COLS: [&str; 7] = [
74 OXEN_ID_COL,
75 DIFF_STATUS_COL,
76 OXEN_ROW_ID_COL,
77 DIFF_HASH_COL,
78 EVAL_STATUS_COL,
79 EVAL_ERROR_COL,
80 EVAL_DURATION_COL,
81];
82pub const OXEN_ROW_ID_COL: &str = "_oxen_row_id";
83/// Oxen's internal id column in duckdb remote staging tables
84pub const OXEN_ID_COL: &str = "_oxen_id";
85/// Name of the folder of the cache dir in which dfs are indexed as duckdbs
86pub const DUCKDB_CACHE_DIR: &str = "duckdb";
87/// Default name for duckdb table used for indexing dataframes
88pub const DUCKDB_DF_TABLE_NAME: &str = "df";
89/// Max number of rows to query from a dataframe
90pub const MAX_QUERYABLE_ROWS: usize = 5_000_000;
91/// prefix for the sync status dirs to tell if commits are synced locally
92pub const SYNC_STATUS_DIR: &str = "sync_status";
93/// Flag for if the repository was cloned in a shallow fashion
94pub const SHALLOW_FLAG: &str = "SHALLOW";
95/// prefix for the commit indices
96pub const INDICES_DIR: &str = "indices";
97/// prefix for the schema fields that are indexed
98pub const FIELDS_DIR: &str = "fields";
99/// versions/ is where all the versions are stored so that we can use to quickly swap between versions of the file
100pub const VERSIONS_DIR: &str = "versions";
101/// chunks/ is where individual file chunks are stored
102pub const CHUNKS_DIR: &str = "chunks";
103/// objects/ stores pointers to data files and sub-tree structures for efficient commit representations
104pub const OBJECTS_DIR: &str = "objects";
105/// Storage of file node representations in objects dir
106pub const OBJECT_FILES_DIR: &str = "files";
107/// Storage of dir node representations in objects dir
108pub const OBJECT_DIRS_DIR: &str = "dirs";
109/// Storage of hash-bucketed vnode representations in objects dir
110pub const OBJECT_VNODES_DIR: &str = "vnodes";
111/// Storage of schema node representations in objects dir
112pub const OBJECT_SCHEMAS_DIR: &str = "schemas";
113/// File name for files stored in versions directory (>0.8.4). (Was commit id <= 0.8.4)
114pub const VERSION_FILE_NAME: &str = "data";
115/// File name for chunks of files stored in versions directory
116pub const VERSION_CHUNK_FILE_NAME: &str = "chunk";
117/// Chunks directory for version files
118pub const VERSION_CHUNKS_DIR: &str = "chunks";
119/// merge/ is where any merge conflicts are stored so that we can get rid of them
120pub const MERGE_DIR: &str = "merge";
121/// mods/ is where we can stage appends, modifications, deletions to files to be merged later
122pub const MODS_DIR: &str = "mods";
123/// workspaces/ is where we can make remote changes without having to clone locally
124pub const WORKSPACES_DIR: &str = "workspaces";
125/// workspace commit id
126pub const WORKSPACE_CONFIG: &str = "WORKSPACE_CONFIG";
127/// data.arrow
128pub const DATA_ARROW_FILE: &str = "data.arrow";
129
130/// if we have merge conflicts we write to MERGE_HEAD and ORIG_HEAD to keep track of the parents
131pub const MERGE_HEAD_FILE: &str = "MERGE_HEAD";
132/// if we have merge conflicts we write to MERGE_HEAD and ORIG_HEAD to keep track of the parents
133pub const ORIG_HEAD_FILE: &str = "ORIG_HEAD";
134
135/// Key for content being valid
136pub const CONTENT_IS_VALID: &str = "CONTENT_IS_VALID";
137/// Key for if something is synced
138pub const IS_SYNCED: &str = "IS_SYNCED";
139
140/// Default branch name: main
141pub const DEFAULT_BRANCH_NAME: &str = "main";
142/// Default remote name: origin
143pub const DEFAULT_REMOTE_NAME: &str = "origin";
144/// Default remote host: hub.oxen.ai
145pub const DEFAULT_HOST: &str = "hub.oxen.ai";
146/// Default remote scheme: https
147pub const DEFAULT_SCHEME: &str = "https";
148
149/// Default Namespace: ox
150pub const DEFAULT_NAMESPACE: &str = "ox";
151
152/// Initial Commit Message
153pub const INITIAL_COMMIT_MSG: &str = "Initialized Repo 🐂";
154
155/// Internal Name When Performing Computation
156pub const ROW_NUM_COL_NAME: &str = "_row_num";
157/// Internal Name When Performing Computation
158pub const ROW_HASH_COL_NAME: &str = "_row_hash";
159/// Internal Name When Performing Computation
160pub const FILE_ROW_NUM_COL_NAME: &str = "_file_row_num";
161// Internal Name When Performing Computation
162pub const TARGETS_HASH_COL: &str = "_targets_hash";
163// Internal Name When Performing Computation
164pub const KEYS_HASH_COL: &str = "_keys_hash";
165// Internal Name When Performing Computation
166pub const DIFF_STATUS_COL: &str = "_oxen_diff_status";
167// Internal Name When Performing Computation
168pub const DIFF_HASH_COL: &str = "_oxen_diff_hash";
169
170/// Internal Name For Evaluations Status
171pub const EVAL_STATUS_COL: &str = "_oxen_eval_status";
172// Internal Name For Evaluations Error
173pub const EVAL_ERROR_COL: &str = "_oxen_eval_error";
174// Internal Name For Evaluations Duration
175pub const EVAL_DURATION_COL: &str = "_oxen_eval_duration";
176
177// Data transfer
178// Average chunk size of ~10mb
179/// Average chunk size of ~10mb when chunking and sending data
180pub const AVG_CHUNK_SIZE: u64 = 1024 * 1024 * 10;
181// Allow up to N concurrent upload tasks
182/// Allow up to N concurrent upload tasks
183pub const MAX_CONCURRENT_UPLOADS: usize = 30;
184// Limit zip file downloads to batches of size N
185/// Limit zip file downloads to batches of size N
186pub const MAX_ZIP_DOWNLOAD_SIZE: u64 = 1024 * 1024 * 1024; // 1 GB
187// Retry and back off of upload tasks N times
188/// Retry and back off of upload tasks N times
189#[cfg(test)]
190pub const NUM_HTTP_RETRIES: u64 = 1;
191#[cfg(not(test))]
192pub const NUM_HTTP_RETRIES: u64 = 5;
193/// Number of workers
194pub const DEFAULT_NUM_WORKERS: usize = 8;
195/// Default timeout for HTTP requests
196pub const DEFAULT_TIMEOUT_SECS: u64 = 600;
197/// Default vnode size
198pub const DEFAULT_VNODE_SIZE: u64 = 10_000;
199
200/// Pagination page size of 10
201pub const DEFAULT_PAGE_SIZE: usize = 100;
202/// Pagination page number of 1
203pub const DEFAULT_PAGE_NUM: usize = 1;
204
205/// Data Types
206pub const TEXT: &str = "text";
207pub const IMAGE: &str = "image";
208pub const VIDEO: &str = "video";
209pub const AUDIO: &str = "audio";
210pub const TABULAR: &str = "tabular";
211pub const BINARY: &str = "binary";
212pub const DIR: &str = "dir";
213
214/// Minimum allowable oxen version to push or pull data
215pub const MIN_OXEN_VERSION: MinOxenVersion = MinOxenVersion::LATEST;
216
217/// Filepath used to track repo and server-level migration status
218pub const LAST_MIGRATION_FILE: &str = "last_migration.txt";
219
220/// Constraints for diff and compare size
221pub const MAX_DISPLAY_DIRS: usize = 10;
222
223// Oxen stack size
224pub const OXEN_STACK_SIZE: usize = 16_777_216;
225
226// Parse the maximum number of retries allowed on upload from environment variable
227pub fn max_retries() -> usize {
228 if let Ok(max_retries) = std::env::var("OXEN_NUM_RETRIES") {
229 // If the environment variable is set, use that
230 if let Ok(max_retries) = max_retries.parse::<usize>() {
231 max_retries
232 } else {
233 // If parsing failed, fall back to default
234 NUM_HTTP_RETRIES.try_into().unwrap()
235 }
236 } else {
237 // Environment variable not set, use default
238 NUM_HTTP_RETRIES.try_into().unwrap()
239 }
240}
241
242// Parse the timeout for http requests from environment variable
243pub fn timeout() -> u64 {
244 if let Ok(timeout) = std::env::var("OXEN_TIMEOUT_SECS") {
245 // If the environment variable is set, use that
246 if let Ok(timeout) = timeout.parse::<u64>() {
247 timeout
248 } else {
249 // If parsing failed, fall back to default
250 DEFAULT_TIMEOUT_SECS
251 }
252 } else {
253 // Environment variable not set, use default
254 DEFAULT_TIMEOUT_SECS
255 }
256}
257
258// Parse the timeout for http requests from environment variable
259pub fn chunk_size() -> u64 {
260 if let Ok(chunk_size) = std::env::var("OXEN_AVG_CHUNK_SIZE") {
261 // If the environment variable is set, use that
262 if let Ok(chunk_size) = chunk_size.parse::<u64>() {
263 chunk_size
264 } else {
265 // If parsing failed, fall back to default
266 AVG_CHUNK_SIZE
267 }
268 } else {
269 // Environment variable not set, use default
270 AVG_CHUNK_SIZE
271 }
272}