use anyhow::Result;
use clap::{Parser, Subcommand};
use std::path::{Path, PathBuf};
use tracing::Level;
use walkdir::WalkDir;
use rust_memex::{ChunkerKind, NamespaceSecurityConfig, SchemaVersion, ServerConfig, path_utils};
pub const DEFAULT_DASHBOARD_PORT: u16 = 8987;
pub const DEFAULT_SSE_PORT: u16 = 8997;
/// Standard config discovery locations (in priority order)
const CONFIG_SEARCH_PATHS: &[&str] = &[
"~/.rmcp-servers/rust-memex/config.toml",
"~/.config/rust-memex/config.toml",
"~/.rmcp_servers/rust_memex/config.toml", // legacy underscore path
];
/// Discover config file from standard locations
fn discover_config() -> Option<String> {
// 1. Environment variable takes priority
if let Ok(path) = std::env::var("RUST_MEMEX_CONFIG") {
let expanded = shellexpand::tilde(&path).to_string();
if std::path::Path::new(&expanded).exists() {
return Some(path);
}
}
// 2. Check standard locations
for path in CONFIG_SEARCH_PATHS {
let expanded = shellexpand::tilde(path).to_string();
if std::path::Path::new(&expanded).exists() {
return Some(path.to_string());
}
}
None
}
fn load_file_config(path: &str) -> Result<FileConfig> {
let (_canonical, contents) = path_utils::safe_read_to_string(path)
.map_err(|e| anyhow::anyhow!("Cannot load config '{}': {}", path, e))?;
toml::from_str(&contents).map_err(Into::into)
}
/// Load config from explicit path or discover from standard locations
fn load_or_discover_config(explicit_path: Option<&str>) -> Result<(FileConfig, Option<String>)> {
// Explicit path takes priority
if let Some(path) = explicit_path {
return Ok((load_file_config(path)?, Some(path.to_string())));
}
// Try to discover config
if let Some(discovered) = discover_config() {
return Ok((load_file_config(&discovered)?, Some(discovered)));
}
// No config found - use defaults
Ok((FileConfig::default(), None))
}
use crate::cli::config::*;
#[derive(Parser, Debug)]
#[command(
name = "rust-memex",
bin_name = "rust-memex",
author,
version,
about = "rust-memex: custom Rust MCP kernel for RAG and long-term memory.\nCanonical entrypoint for stdio (native MCP) and HTTP/SSE (multi-agent) transports.",
long_about = "rust-memex is a custom Rust MCP kernel providing RAG and long-term memory capabilities to AI agents via LanceDB.\n\nIt exposes two explicit transport modes from a single canonical surface:\n1. stdio (Standard MCP): Native MCP integration for local agents.\n2. HTTP/SSE (Multi-Agent Daemon): Central daemon mode allowing concurrent AI agents to access the same memory pool over the network.\n\nrust-memex is the only supported binary name. The GitHub installer may also create rust_memex as a legacy compatibility symlink for older scripts."
)]
pub struct Cli {
#[command(subcommand)]
pub command: Option<Commands>,
/// Optional config file (TOML) to load settings from; CLI flags override file when set.
#[arg(long, global = true)]
pub config: Option<String>,
/// Legacy compatibility shim. Ignored at runtime.
#[arg(long, value_parser = ["memory", "full"], global = true, hide = true)]
pub mode: Option<String>,
/// Legacy compatibility shim. Ignored at runtime.
#[arg(long, global = true, hide = true)]
pub features: Option<String>,
/// Cache size in MB
#[arg(long, global = true)]
pub cache_mb: Option<usize>,
/// Path for embedded vector store (LanceDB)
#[arg(long, global = true)]
pub db_path: Option<String>,
/// Max allowed request size in bytes for JSON-RPC framing
#[arg(long, global = true)]
pub max_request_bytes: Option<usize>,
/// Log level
#[arg(long, global = true)]
pub log_level: Option<String>,
/// Allowed paths for file access (whitelist). Can be specified multiple times.
/// If not set, defaults to $HOME and current working directory.
/// Supports ~ expansion and absolute paths.
#[arg(long, global = true, action = clap::ArgAction::Append)]
pub allowed_paths: Option<Vec<String>>,
/// Enable namespace token-based access control.
/// When enabled, protected namespaces require a token for access.
#[arg(long, global = true)]
pub security_enabled: bool,
/// Path to token store file for namespace access tokens.
/// Defaults to ~/.rmcp-servers/rust-memex/tokens.json when security is enabled.
#[arg(long, global = true)]
pub token_store_path: Option<String>,
/// HTTP/SSE server port for multi-agent access.
/// When set, starts an HTTP server alongside MCP stdio.
/// Agents can query via HTTP instead of holding LanceDB lock directly.
/// Example: --http-port 8997
#[arg(long, global = true)]
pub http_port: Option<u16>,
/// Run HTTP server only, without MCP stdio.
/// Use this for daemon mode where agents connect via HTTP.
/// Requires --http-port to be set.
#[arg(long, global = true)]
pub http_only: bool,
/// Migrate an older LanceDB schema at daemon startup instead of refusing to start.
/// Default is fail-fast; run `rust-memex migrate-schema --db-path <path>` for manual control.
#[arg(long, global = true)]
pub auto_migrate: bool,
/// Bearer token for authenticating HTTP endpoints.
/// API/SSE/MCP access stays Bearer even when dashboard OIDC is enabled.
/// Can also be set via MEMEX_AUTH_TOKEN env var.
#[arg(long, global = true)]
pub auth_token: Option<String>,
/// Bind address for the HTTP server. Defaults to 127.0.0.1 (localhost only).
/// Use 0.0.0.0 to expose on all interfaces (requires --auth-token for safety).
#[arg(long, global = true)]
pub bind_address: Option<String>,
/// Allowed CORS origins (comma-separated). If empty, defaults to same-origin
/// when bound to non-localhost, or permissive when bound to localhost.
#[arg(long, global = true)]
pub cors_origins: Option<String>,
/// Allow binding to non-loopback addresses without --auth-token.
/// By default, binding to e.g. 0.0.0.0 without auth is a hard error.
/// This flag downgrades it to a warning.
#[arg(long, global = true)]
pub allow_network_without_auth: bool,
/// Auth enforcement mode for HTTP endpoints.
/// - mutating-only (default): bearer required only on mutating + MCP routes
/// - all-routes: bearer required on ALL routes
/// - namespace-acl: reserved for Track C (namespace-level ACL)
#[arg(long, global = true, default_value = "mutating-only",
value_parser = ["mutating-only", "all-routes", "namespace-acl"])]
pub auth_mode: String,
/// Allow passing bearer token as ?token= query parameter on read GET endpoints.
/// Disabled by default. Only effective when --auth-mode is all-routes.
#[arg(long, global = true)]
pub allow_query_token: bool,
}
#[derive(Subcommand, Debug)]
pub enum Commands {
/// Run the MCP server (default if no subcommand specified)
Serve,
/// Run the local dashboard server and open it in the default browser.
Dashboard {
/// Dashboard HTTP port (default: 8987)
#[arg(long, short = 'p')]
port: Option<u16>,
/// Do not open the dashboard in a browser after startup
#[arg(long)]
no_open: bool,
},
/// Run the HTTP/SSE daemon on the agent-facing port.
Sse {
/// HTTP/SSE port (default: 8997)
#[arg(long, short = 'p')]
port: Option<u16>,
},
/// Launch interactive configuration wizard
#[command(alias = "config")]
Wizard {
/// Dry run mode - show changes without writing files
#[arg(long)]
dry_run: bool,
},
/// Quick stats and health check for namespaces
///
/// Shows chunk count, date range, top topics, and storage info.
///
/// Examples:
/// rust-memex overview # All namespaces
/// rust-memex overview memories # Specific namespace
Overview {
/// Namespace to get overview for (optional, shows all if not specified)
namespace: Option<String>,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Deep exploration with all details - drill into onion layers
///
/// Shows ALL onion layers (outer/middle/inner/core), both BM25 and vector scores,
/// full metadata, and related chunks.
///
/// Examples:
/// rust-memex dive -n memories -q "dragon"
/// rust-memex dive -n memories -q "dragon" --verbose
Dive {
/// Namespace to search in
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Search query text
#[arg(long, short = 'q', required = true)]
query: String,
/// Maximum number of results per layer
#[arg(long, short = 'l', default_value = "5")]
limit: usize,
/// Show extra verbose output (full text, all metadata)
#[arg(long, short = 'v')]
verbose: bool,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Batch index documents into vector store
Index {
/// Path to file or directory to index
#[arg(value_name = "PATH", required_unless_present = "source")]
path: Option<PathBuf>,
/// Path to file or directory to index
#[arg(long, value_name = "PATH", conflicts_with = "path")]
source: Option<PathBuf>,
/// Namespace for indexed documents (default: "rag")
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Recursively walk subdirectories
#[arg(long, short = 'r')]
recursive: bool,
/// Glob pattern to filter files (e.g. "*.md", "*.pdf")
#[arg(long, short = 'g')]
glob: Option<String>,
/// Maximum depth when walking directories (0 = unlimited)
#[arg(long, default_value = "0")]
max_depth: usize,
/// Enable preprocessing to filter noise (tool artifacts, CLI output)
/// before indexing. Reduces vector storage size and improves search quality.
/// Note: timestamps are preserved by default; use --sanitize-metadata to remove them.
#[arg(long, short = 'p')]
preprocess: bool,
/// Sanitize timestamps, UUIDs, and session IDs from content.
/// By default, these are preserved for temporal queries.
/// Use this flag when you want to anonymize or normalize the data.
#[arg(long)]
sanitize_metadata: bool,
/// Slicing mode for document chunking:
/// - "onion" (default): Hierarchical slices (outer/middle/inner/core) for efficient context
/// - "onion-fast" / "fast": Only outer+core layers (2x faster, good for large datasets)
/// - "flat": Traditional fixed-size chunks with overlap
#[arg(long, short = 's', default_value = "onion", value_parser = ["onion", "onion-fast", "fast", "flat"])]
slice_mode: String,
/// Chunk provider override. If omitted, rust-memex routes per namespace/path.
#[arg(long, value_enum)]
chunker: Option<ChunkerKind>,
/// Outer-layer synthesis strategy for onion modes (spec P3).
/// - "keyword" (default): TF-based keyword extraction. No I/O.
/// - "llm": Synthesize the outer layer via a local Ollama model. Requires --pipeline mode.
///
/// When set to "llm" the slicer POSTs each document to
/// `{ollama-endpoint}/api/generate` with `{ollama-model, stream: false}`
/// and replaces the keyword outer with the model's 1-3 sentence summary.
/// Failures (network, non-2xx, malformed JSON, empty completion) silently
/// fall back to the keyword outer so the pipeline never stalls.
///
/// Reachable only through --pipeline mode; the legacy non-pipeline path
/// always uses the keyword outer regardless of this flag, so passing
/// --outer-synthesis llm without --pipeline is rejected up-front.
#[arg(long, default_value = "keyword", value_parser = ["keyword", "llm"])]
outer_synthesis: String,
/// Ollama model name used when --outer-synthesis llm is set.
/// Spec P3 baseline: a small local model such as qwen2.5:3b or phi-3.5:mini
/// fits the 1-3 sentence summary budget cheaply.
#[arg(long, default_value = "qwen2.5:3b")]
ollama_model: String,
/// Ollama HTTP endpoint used when --outer-synthesis llm is set.
/// Defaults to a local Ollama daemon. Trailing slash is normalized.
#[arg(long, default_value = "http://localhost:11434")]
ollama_endpoint: String,
/// Enable exact-match deduplication (default: enabled).
/// Skips indexing files whose content already exists in the namespace.
/// Uses SHA256 hash of original content before any preprocessing.
#[arg(long, default_value = "true", action = clap::ArgAction::Set)]
dedup: bool,
/// Force re-indexing even when the source already exists in the namespace
/// (spec P4 escape hatch).
///
/// Equivalent to passing `--dedup false` but more explicit at the call
/// site: this is the operator-visible knob for "I know this source is
/// already indexed, re-embed it anyway." Use cases per spec P4: force
/// reindex after a slicer change, debug a specific document, or
/// rebuild a layer that was partially purged.
///
/// Takes precedence over `--dedup` when set, so callers do not need
/// to pass both flags. The skip-log line for already-indexed sources
/// stays at `info!` level so an operator can tell from the run log
/// whether dedup was active and which sources were collapsed.
#[arg(long)]
allow_duplicates: bool,
/// Exit non-zero if any file failed to index
#[arg(long)]
strict: bool,
/// Exit non-zero if failure rate exceeds threshold (0.0-1.0)
#[arg(long, default_value = "1.0")]
max_failure_rate: f64,
/// Emit JSON summary on stdout as the last line
#[arg(long)]
json: bool,
/// Show progress bar with ETA when running in an interactive terminal.
/// Non-interactive runs fall back to line logs.
#[arg(long)]
progress: bool,
/// Resume from last checkpoint if interrupted.
/// Saves progress after each committed file to .index-checkpoint-<namespace>.json.
/// On restart, skips already indexed files and continues.
#[arg(long)]
resume: bool,
/// Enable async pipeline mode for concurrent indexing.
/// Runs file reading, chunking, embedding, and storage in parallel
/// using tokio channels. Can significantly speed up large batch operations.
/// Supports live progress output and commit-based resume checkpoints.
#[arg(long)]
pipeline: bool,
/// Maximum number of embedding requests to keep in flight in pipeline mode.
/// With --pipeline-governor disabled this is a fixed concurrency limit.
/// With --pipeline-governor enabled this becomes the governor's ceiling.
#[arg(long, default_value = "1", value_parser = clap::value_parser!(u8).range(1..=8))]
pipeline_embed_concurrency: u8,
/// Enable adaptive pipeline flow control for embedding batch sizes and concurrency.
/// Uses embed latency and queue pressure to increase slowly and back off quickly.
#[arg(long)]
pipeline_governor: bool,
/// Number of files to process in parallel (default: 4, max: 16).
/// Higher values can speed up indexing on multi-core systems,
/// but may increase memory usage and API pressure.
/// Note: This is ignored when --pipeline is enabled.
#[arg(long, short = 'P', default_value = "4", value_parser = clap::value_parser!(u8).range(1..=16))]
parallel: u8,
},
/// Smart semantic search within a namespace
///
/// Finds relevant information using vector similarity search with intelligent
/// defaults. Results include relevance scores, timestamps, and metadata.
///
/// Examples:
/// rust-memex search -n memories -q "when did we buy dragon"
/// rust-memex search -n memories -q "dragon" --deep
/// rust-memex search -n memories -q "dragon" -l 20
/// rust-memex search -n memories -q "dragon" --mode hybrid
Search {
/// Namespace to search in
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Search query text
#[arg(long, short = 'q', required = true)]
query: String,
/// Maximum number of results to return (default: 10)
#[arg(long, short = 'l', default_value = "10")]
limit: usize,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
/// Deep search: include all layers (outer/middle/inner/core) instead of just outer
#[arg(long)]
deep: bool,
/// Filter by specific layer (outer, middle, inner, core)
#[arg(long, value_parser = ["outer", "middle", "inner", "core"])]
layer: Option<String>,
/// Search mode: vector (similarity only), keyword/bm25 (lexical only), or hybrid (default)
/// Hybrid combines vector and BM25 using score fusion for best results.
#[arg(long, short = 'm', default_value = "hybrid", value_parser = ["vector", "keyword", "bm25", "hybrid"])]
mode: String,
/// Auto-detect query intent and select optimal search mode.
/// Overrides --mode when enabled. Uses QueryRouter to analyze query.
#[arg(long)]
auto_route: bool,
/// Show relevance scores prominently (enabled by default)
#[arg(long, default_value = "true", action = clap::ArgAction::Set)]
scores: bool,
},
/// Expand a slice to get its children (drill down in onion hierarchy)
Expand {
/// Namespace containing the slice
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Slice ID to expand
#[arg(long, short = 'i', required = true)]
id: String,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Get a specific chunk by namespace and ID
Get {
/// Namespace containing the chunk
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Chunk ID to retrieve
#[arg(long, short = 'i', required = true)]
id: String,
/// Output result as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// RAG search across all namespaces or a specific one
RagSearch {
/// Search query text
#[arg(long, short = 'q', required = true)]
query: String,
/// Maximum number of results to return
#[arg(long, short = 'l', default_value = "10")]
limit: usize,
/// Optional namespace to limit search to
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// List all namespaces with optional statistics
Namespaces {
/// Show statistics (document count, etc.)
#[arg(long, short = 's')]
stats: bool,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Export a namespace to JSONL file for portable backup
///
/// Each document is written as a JSON line with: id, text, metadata, content_hash,
/// and optionally embeddings. Use with 'import' command for backup/restore.
///
/// Examples:
/// rust-memex export -n memories -o backup.jsonl
/// rust-memex export -n memories --include-embeddings -o full-backup.jsonl
Export {
/// Namespace to export
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Output file path (.jsonl format, stdout if not specified)
#[arg(long, short = 'o')]
output: Option<PathBuf>,
/// Include vector embeddings in export (makes files much larger)
#[arg(long)]
include_embeddings: bool,
/// Database path override
#[arg(long)]
db_path: Option<String>,
},
/// Upsert a text chunk directly into vector memory (for hooks/scripts)
Upsert {
/// Namespace for the chunk
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Unique ID for the chunk
#[arg(long, short = 'i', required = true)]
id: String,
/// Text content (if not provided, reads from stdin)
#[arg(long, short = 't')]
text: Option<String>,
/// Optional metadata as JSON string
#[arg(long, short = 'm', default_value = "{}")]
metadata: String,
},
/// Optimize database: compact files and cleanup old versions
///
/// Runs both compaction (merge small files) and pruning (remove old versions).
/// Use this after large indexing operations to improve query performance
/// and reduce file descriptor usage.
Optimize,
/// Show database health status and recommendations
///
/// Checks database connectivity, embedder availability, namespace stats,
/// and provides maintenance recommendations.
///
/// Examples:
/// rust-memex health # Full health check
/// rust-memex health --quick # Skip embedder check (faster)
/// rust-memex health --json # JSON output for scripting
Health {
/// Skip embedder connectivity check (faster, DB-only)
#[arg(long, short = 'q')]
quick: bool,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Recall memories about a topic with synthesized summary
///
/// Searches your memories and presents results as a coherent summary,
/// using the onion slice architecture (outer layers = summaries).
///
/// Examples:
/// rust-memex recall "Vista architecture" # Search all namespaces
/// rust-memex recall "dragon setup" -n memories # Specific namespace
/// rust-memex recall "auth flow" --limit 20 # More sources
Recall {
/// What to recall (search query)
query: String,
/// Limit to specific namespace (default: search all)
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Maximum number of sources to consider (default: 10)
#[arg(long, short = 'l', default_value = "10")]
limit: usize,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Show timeline of indexed content
///
/// Displays when documents were indexed, grouped by month.
/// Useful for understanding temporal coverage of your memory.
///
/// Examples:
/// rust-memex timeline # All namespaces
/// rust-memex timeline -n memories # Specific namespace
/// rust-memex timeline -n memories --since 30d # Last 30 days
/// rust-memex timeline --gaps # Show only gaps
Timeline {
/// Filter to specific namespace (default: all namespaces)
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Show entries since this time (e.g., "30d", "2025-01", "2024-12-01")
#[arg(long)]
since: Option<String>,
/// Only show gaps in the timeline (days with no indexed content)
#[arg(long)]
gaps: bool,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Compact database files into larger chunks
///
/// Merges small data files into larger ones for better read performance.
/// Run this after many small inserts to reduce "too many open files" errors.
Compact,
/// Cleanup old database versions
///
/// Removes old versions of the data that are no longer needed.
/// By default, keeps versions from the last 7 days.
Cleanup {
/// Remove versions older than N days (default: 7)
#[arg(long, default_value = "7")]
older_than_days: u64,
},
/// Show database statistics
///
/// Displays row count, version count, and storage information.
Stats,
/// Garbage collection: clean up orphaned data
///
/// Removes orphan embeddings, empty namespaces, and old documents.
/// Always runs in dry-run mode unless you pass the --execute flag.
///
/// Examples:
/// rust-memex gc --remove-orphans # Dry run: show orphans
/// rust-memex gc --remove-orphans --execute # Actually remove orphans
/// rust-memex gc --older-than 90d # Dry run: docs older than 90 days
/// rust-memex gc --older-than 6m --namespace logs # Only in 'logs' namespace
/// rust-memex gc --remove-orphans --remove-empty --older-than 1y --execute
Gc {
/// Remove orphan embeddings (documents with parent_id pointing to non-existent documents)
#[arg(long)]
remove_orphans: bool,
/// Remove empty namespaces (report namespaces with 0 documents)
#[arg(long)]
remove_empty: bool,
/// Remove documents older than this duration (e.g., "30d", "6m", "1y")
#[arg(long)]
older_than: Option<String>,
/// Actually execute the cleanup (default is dry-run mode)
#[arg(long)]
execute: bool,
/// Limit to specific namespace (optional, applies to all if not set)
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Inspect or repair pending Lance/BM25 recovery ledgers
///
/// This is the explicit recovery contract for partial cross-store writes.
/// It does not claim crash-safe atomicity. Instead it inspects persisted
/// batch ledgers, reports divergence, and can replay BM25 writes to match
/// current Lance truth.
///
/// Examples:
/// rust-memex repair-writes
/// rust-memex repair-writes --execute
/// rust-memex repair-writes -n memories --json
RepairWrites {
/// Limit inspection/repair to a single namespace
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Actually execute reconciliation. Default is dry-run/report-only.
#[arg(long)]
execute: bool,
/// Output results as JSON instead of human-readable text
#[arg(long)]
json: bool,
},
/// Search across all namespaces
///
/// Performs a unified search across every namespace, merging and ranking results.
///
/// Examples:
/// rust-memex cross-search "error handling"
/// rust-memex cross-search "config" --mode hybrid --limit 5 --total-limit 20
/// rust-memex cross-search "memory leak" --json
CrossSearch {
/// The search query
query: String,
/// Maximum results per namespace (default: 10)
#[arg(long, default_value = "10")]
limit: usize,
/// Maximum total results after merging (default: 50)
#[arg(long, default_value = "50")]
total_limit: usize,
/// Search mode: vector, bm25/keyword, or hybrid (default: hybrid)
#[arg(long, default_value = "hybrid")]
mode: String,
/// Output results as JSON
#[arg(long)]
json: bool,
},
/// Merge multiple LanceDB databases into one with deduplication
///
/// Combines documents from multiple source databases into a single target database.
/// Useful for consolidating memory across machines or instances.
///
/// Examples:
/// rust-memex merge --source ~/db1 --source ~/db2 --target ~/merged
/// rust-memex merge --source ~/db1 --source ~/db2 --target ~/merged --dedup
/// rust-memex merge --source ~/dragon-db --target ~/merged --namespace-prefix "dragon:"
/// rust-memex merge --source ~/db1 --target ~/merged --dry-run
Merge {
/// Source database paths (can specify multiple times)
#[arg(long, short = 's', required = true, action = clap::ArgAction::Append)]
source: Vec<PathBuf>,
/// Target database path (will be created if not exists)
#[arg(long, short = 't', required = true)]
target: PathBuf,
/// Deduplicate by content_hash (skip documents with same hash)
#[arg(long, short = 'd')]
dedup: bool,
/// Prefix to add to source namespaces (e.g., "dragon:" -> "dragon:memories")
#[arg(long, short = 'p')]
namespace_prefix: Option<String>,
/// Show what would be merged without actually doing it
#[arg(long)]
dry_run: bool,
/// Output results as JSON
#[arg(long)]
json: bool,
},
/// Find and remove duplicate documents based on the chosen grouping key
///
/// Groups documents per --group-by and removes duplicates, keeping one
/// document per group based on the --keep strategy. The default
/// `source-hash-layer` grouping preserves the onion structure: for each
/// source document it keeps exactly one chunk per layer, removing only
/// real repeats (e.g. `__dupe__` + `__clean__` variants of the same
/// transcript). Use `content-hash` only when you actually want byte-
/// identical chunk text grouping (legacy v3 behavior).
///
/// Examples:
/// rust-memex dedup # All namespaces, dry-run
/// rust-memex dedup -n kb:transcripts # Specific namespace
/// rust-memex dedup --dry-run false # Actually remove duplicates
/// rust-memex dedup --keep newest # Keep newest duplicates
/// rust-memex dedup --cross-namespace # Dedup across all namespaces
/// rust-memex dedup --group-by content-hash # Legacy per-chunk hash
/// rust-memex dedup --group-by source-hash # Collapse all layers per source
Dedup {
/// Specific namespace to deduplicate (if not set, processes all namespaces separately)
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Show duplicates without removing them (default: true)
#[arg(long, default_value = "true", action = clap::ArgAction::Set)]
dry_run: bool,
/// Strategy for which document to keep when duplicates are found:
/// - "oldest": Keep the document with the earliest ID (lexicographic, default)
/// - "newest": Keep the document with the latest ID (lexicographic)
/// - "highest-score": Keep the document that appears first in vector search
#[arg(long, default_value = "oldest", value_parser = ["oldest", "newest", "highest-score"])]
keep: String,
/// Deduplicate across all namespaces (treat entire DB as one pool).
/// By default, deduplication is done within each namespace separately.
#[arg(long)]
cross_namespace: bool,
/// How to bucket chunks into duplicate groups.
/// - "source-hash-layer" (default): keeps onion intact, removes only true source repeats
/// - "source-hash": collapses all layers of a source into one group
/// - "content-hash": legacy per-chunk text hash (post-v4 finds nothing on fresh indexes)
#[arg(
long = "group-by",
default_value = "source-hash-layer",
value_parser = ["source-hash-layer", "source-hash", "content-hash"]
)]
group_by: String,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Migrate or rename a namespace
///
/// Moves all documents from one namespace to another. Useful for renaming
/// namespaces or consolidating data.
///
/// Examples:
/// rust-memex migrate-namespace --from old-name --to new-name
/// rust-memex migrate-namespace --from old --to new --merge
/// rust-memex migrate-namespace --from old --to new --dry-run
/// rust-memex migrate-namespace --from old --to new --delete-source false
#[command(alias = "mv-namespace")]
MigrateNamespace {
/// Source namespace name
#[arg(long, required = true)]
from: String,
/// Target namespace name
#[arg(long, required = true)]
to: String,
/// If target namespace exists, merge documents instead of erroring
#[arg(long)]
merge: bool,
/// Delete source namespace after migration (default: true)
#[arg(long, default_value = "true", action = clap::ArgAction::Set)]
delete_source: bool,
/// Show what would happen without making changes
#[arg(long)]
dry_run: bool,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Delete all documents in a namespace (DESTRUCTIVE)
///
/// Permanently removes all chunks from the specified namespace.
/// This action cannot be undone - use with caution!
///
/// Examples:
/// rust-memex purge-namespace -n garbage
/// rust-memex purge-namespace -n old-data --confirm
#[command(alias = "purge")]
PurgeNamespace {
/// Namespace to purge
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Skip confirmation prompt (use with caution!)
#[arg(long)]
confirm: bool,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Import documents from JSONL file into a namespace
///
/// Reads documents exported with 'export' command and stores them.
/// Can re-embed text if embeddings were not included in export.
///
/// Examples:
/// rust-memex import -n memories -i backup.jsonl
/// rust-memex import -n new-namespace -i backup.jsonl --skip-existing
Import {
/// Target namespace (can differ from original export)
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Input JSONL file path
#[arg(long, short = 'i', required = true)]
input: PathBuf,
/// Skip documents whose content_hash already exists in target namespace
#[arg(long)]
skip_existing: bool,
/// Database path override
#[arg(long)]
db_path: Option<String>,
},
/// Reprocess exported JSONL into a fresh namespace using the current chunker
///
/// Useful when the original source files are gone but the namespace export is valuable.
/// The command collapses onion families back to a single canonical document, optionally
/// preprocesses the text, and re-indexes it with the requested slice mode.
///
/// Examples:
/// rust-memex export -n kodowanie -o kodowanie.jsonl
/// rust-memex reprocess -i kodowanie.jsonl -n kodowanie-v2 --slice-mode onion-fast
/// rust-memex reprocess -i memories.jsonl -n memories-v2 --preprocess --dry-run
#[command(alias = "reindex-export")]
Reprocess {
/// Target namespace for rebuilt documents
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Input JSONL file produced by 'export'
#[arg(long, short = 'i', required = true)]
input: PathBuf,
/// Slice mode for the rebuilt namespace
#[arg(long, short = 's', default_value = "onion", value_parser = ["onion", "onion-fast", "fast", "flat"])]
slice_mode: String,
/// Chunk provider override. If omitted, rust-memex routes by source/namespace heuristic.
#[arg(long, value_enum)]
chunker: Option<ChunkerKind>,
/// Apply preprocessing before rebuilding documents
#[arg(long)]
preprocess: bool,
/// Skip documents already rebuilt with the same source hash
#[arg(long)]
skip_existing: bool,
/// Force rebuilding even when source_hash already exists in the target namespace
#[arg(long)]
allow_duplicates: bool,
/// Exit non-zero if any document failed to reprocess
#[arg(long)]
strict: bool,
/// Exit non-zero if failure rate exceeds threshold (0.0-1.0)
#[arg(long, default_value = "1.0")]
max_failure_rate: f64,
/// Emit JSON summary on stdout as the last line
#[arg(long)]
json: bool,
/// Show what would be rebuilt without writing anything
#[arg(long)]
dry_run: bool,
/// Database path override
#[arg(long)]
db_path: Option<String>,
},
/// Reindex an existing rust-memex namespace into '<namespace>-reindexed'
///
/// This is the in-database equivalent of 'export -> reprocess' for namespaced
/// rust-memex stores. It reads the existing namespace, collapses onion families
/// back to canonical documents, and writes a rebuilt namespace without touching
/// the source data.
///
/// Examples:
/// rust-memex reindex -n kodowanie
/// rust-memex reindex -n kodowanie --dry-run
/// rust-memex reindex -n kodowanie --target-namespace kodowanie-v2 --slice-mode onion-fast
Reindex {
/// Source namespace to rebuild
#[arg(long, short = 'n', required = true)]
namespace: String,
/// Target namespace override (default: '<namespace>-reindexed')
#[arg(long)]
target_namespace: Option<String>,
/// Slice mode for the rebuilt namespace
#[arg(long, short = 's', default_value = "onion", value_parser = ["onion", "onion-fast", "fast", "flat"])]
slice_mode: String,
/// Chunk provider override. If omitted, rust-memex routes by source/namespace heuristic.
#[arg(long, value_enum)]
chunker: Option<ChunkerKind>,
/// Apply preprocessing before rebuilding documents
#[arg(long)]
preprocess: bool,
/// Skip documents already rebuilt with the same source hash
#[arg(long)]
skip_existing: bool,
/// Force rebuilding even when source_hash already exists in the target namespace
#[arg(long)]
allow_duplicates: bool,
/// Exit non-zero if any document failed to reindex
#[arg(long)]
strict: bool,
/// Exit non-zero if failure rate exceeds threshold (0.0-1.0)
#[arg(long, default_value = "1.0")]
max_failure_rate: f64,
/// Emit JSON summary on stdout as the last line
#[arg(long)]
json: bool,
/// Show what would be rebuilt without writing anything
#[arg(long)]
dry_run: bool,
/// Database path override
#[arg(long)]
db_path: Option<String>,
},
/// Audit database quality and text integrity
///
/// Analyzes namespaces for embedding quality, text integrity (>90% target),
/// and provides recommendations for cleanup.
///
/// Examples:
/// rust-memex audit # Audit all namespaces
/// rust-memex audit -n memories # Audit specific namespace
/// rust-memex audit --threshold 85 # Custom quality threshold
/// rust-memex audit --json # JSON output for scripting
#[command(alias = "quality")]
Audit {
/// Specific namespace to audit (default: all namespaces)
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Minimum quality threshold (0-100, default: 90)
#[arg(long, default_value = "90")]
threshold: u8,
/// Show detailed metrics for each chunk (verbose)
#[arg(long, short = 'v')]
verbose: bool,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Purge low-quality namespaces based on audit results
///
/// Removes namespaces that fall below the quality threshold.
/// Always runs in dry-run mode unless --confirm is passed.
///
/// Examples:
/// rust-memex purge-quality # Dry run with 90% threshold
/// rust-memex purge-quality --threshold 80 # Lower threshold
/// rust-memex purge-quality --confirm # Actually delete
#[command(alias = "purge-low-quality")]
PurgeQuality {
/// Minimum quality threshold (0-100, default: 90)
#[arg(long, default_value = "90")]
threshold: u8,
/// Actually delete namespaces (default: dry-run)
#[arg(long)]
confirm: bool,
/// Output results as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Migrate the LanceDB table schema to the target binary contract
///
/// Adds missing nullable columns in-place without rewriting rows. This closes
/// the pre-v4 -> v4 chicken-and-egg where `backfill-hashes` needs the
/// `source_hash` column before it can repair legacy hash data.
///
/// Examples:
/// rust-memex migrate-schema --check-only
/// rust-memex migrate-schema --db-path /path/to/lancedb
/// rust-memex migrate-schema --target v4
MigrateSchema {
/// Target schema version (default: v4)
#[arg(long, default_value_t = SchemaVersion::current())]
target: SchemaVersion,
/// Report whether migration is needed without changing the table.
/// Exits 1 when required columns are missing.
#[arg(long)]
check_only: bool,
},
/// Backfill per-chunk `content_hash` and `source_hash` for legacy chunks
///
/// Walks the namespace (or every namespace when `-n` is omitted) and
/// recomputes `content_hash = SHA256(chunk_text)` for every row, recovering
/// the legacy source-text hash into the new `source_hash` column. Idempotent:
/// rows that already match the v4 contract are counted as "consistent" and
/// left alone. Pre-v4 chunks (single hash equal to source text) get the
/// hash promoted into `source_hash` so post-v4 dedup grouping works without
/// re-reading source files.
///
/// Defaults to `--dry-run true` so an operator can audit before writing.
///
/// Spec: `2026-04-27_kb-transcripts-onion-slicer-fix-spec.md`, P0 backfill.
///
/// Examples:
/// rust-memex backfill-hashes # All namespaces, dry-run
/// rust-memex backfill-hashes -n kb:transcripts # One namespace, dry-run
/// rust-memex backfill-hashes -n kb:transcripts --dry-run false # Actually write
/// rust-memex backfill-hashes --json # Machine-readable
BackfillHashes {
/// Specific namespace to backfill (default: every namespace)
#[arg(long, short = 'n')]
namespace: Option<String>,
/// Plan only, write nothing (default: true). Pass `--dry-run false`
/// to actually rewrite the rows. Mirrors the `dedup` CLI default so
/// an operator never accidentally rewrites a namespace.
#[arg(long, default_value = "true", action = clap::ArgAction::Set)]
dry_run: bool,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
/// Exit non-zero if any document could not be backfilled
#[arg(long)]
strict: bool,
/// Exit non-zero if failure rate exceeds threshold (0.0-1.0)
#[arg(long, default_value = "1.0")]
max_failure_rate: f64,
},
/// Manage auth tokens with per-token scopes and namespace ACL
///
/// Create, list, revoke, and rotate bearer tokens for HTTP API access.
/// Each token is hashed with argon2id at rest. The plaintext is shown
/// ONCE on creation and can never be retrieved again.
///
/// Examples:
/// rust-memex auth create --description "iPhone" --scopes read,write --namespaces kb:claude,kb:mikserka
/// rust-memex auth list
/// rust-memex auth revoke --id monika-iphone
/// rust-memex auth rotate --id monika-iphone
Auth {
#[command(subcommand)]
action: AuthAction,
},
}
/// Auth token management subcommands.
#[derive(Subcommand, Debug)]
pub enum AuthAction {
/// Create a new auth token
///
/// Generates a new token with specified scopes and namespace access.
/// The plaintext token is printed ONCE and never stored.
Create {
/// Human-readable token identifier (e.g., "monika-iphone")
#[arg(long)]
id: Option<String>,
/// Description of what this token is for
#[arg(long, required = true)]
description: String,
/// Comma-separated scopes: read, write, admin
#[arg(long, default_value = "read,write")]
scopes: String,
/// Comma-separated namespace ACL. Use "*" for all namespaces.
#[arg(long, default_value = "*")]
namespaces: String,
/// Token expiry (RFC 3339 timestamp, e.g., "2026-12-31T00:00:00Z")
#[arg(long)]
expires_at: Option<String>,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// List all tokens (without revealing plaintext)
List {
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
/// Revoke (delete) a token by its ID
Revoke {
/// Token ID to revoke
#[arg(long, required = true)]
id: String,
},
/// Rotate a token: revoke old, create new with same metadata
Rotate {
/// Token ID to rotate
#[arg(long, required = true)]
id: String,
/// Output as JSON instead of human-readable format
#[arg(long)]
json: bool,
},
}
impl Cli {
pub fn into_server_config(self) -> Result<ServerConfig> {
let (file_cfg, config_path) = load_or_discover_config(self.config.as_deref())?;
if let Some(ref path) = config_path {
eprintln!("Using config: {}", path);
}
let legacy_mode = self.mode.clone().or_else(|| file_cfg.mode.clone());
let legacy_features = self.features.clone().or_else(|| file_cfg.features.clone());
if legacy_mode.is_some() || legacy_features.is_some() {
eprintln!(
"Warning: legacy mode/features settings are ignored. rust-memex now exposes one canonical MCP surface; constrain access with --allowed-paths, HTTP auth, or namespace security instead."
);
}
// Extract embedding config first (before any moves from file_cfg)
let embeddings = file_cfg.resolve_embedding_config();
let default_cfg = ServerConfig::default();
let db_path = FileConfig::resolve_db_path(
self.db_path.as_deref(),
file_cfg.db_path.as_deref(),
config_path.is_some(),
false,
);
// Build security config from CLI and file settings
let security_enabled = self.security_enabled || file_cfg.security_enabled.unwrap_or(false);
let token_store_path = self.token_store_path.or(file_cfg.token_store_path);
// Derive BM25 index path as sibling of db_path:
// db_path = "~/.rmcp-servers/rmcp-memex/lancedb"
// bm25 = "~/.rmcp-servers/rmcp-memex/bm25"
let mut hybrid = default_cfg.hybrid;
let expanded_db = shellexpand::tilde(&db_path).to_string();
if let Some(parent) = std::path::Path::new(&expanded_db).parent() {
hybrid.bm25.index_path = parent.join("bm25").to_string_lossy().to_string();
}
Ok(ServerConfig {
cache_mb: self
.cache_mb
.or(file_cfg.cache_mb)
.unwrap_or(default_cfg.cache_mb),
db_path,
max_request_bytes: self
.max_request_bytes
.or(file_cfg.max_request_bytes)
.unwrap_or(default_cfg.max_request_bytes),
log_level: self
.log_level
.or(file_cfg.log_level)
.map(|s| parse_log_level(&s))
.unwrap_or(default_cfg.log_level),
allowed_paths: self
.allowed_paths
.or(file_cfg.allowed_paths)
.unwrap_or(default_cfg.allowed_paths),
security: NamespaceSecurityConfig {
enabled: security_enabled,
token_store_path,
},
embeddings,
hybrid,
})
}
}
pub fn parse_log_level(level: &str) -> Level {
match level.to_ascii_lowercase().as_str() {
"trace" => Level::TRACE,
"debug" => Level::DEBUG,
"info" => Level::INFO,
"warn" => Level::WARN,
"error" => Level::ERROR,
_ => Level::INFO,
}
}
/// Check if a path matches a glob pattern
pub fn matches_glob(path: &Path, pattern: &str) -> bool {
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => return false,
};
glob::Pattern::new(pattern)
.map(|p| p.matches(file_name))
.unwrap_or(false)
}
/// Collect files to index based on path, recursion, and glob settings
pub fn collect_files(
path: &Path,
recursive: bool,
glob_pattern: Option<&str>,
max_depth: usize,
) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
if path.is_file() {
// Single file - check glob if provided
if let Some(pattern) = glob_pattern {
if matches_glob(path, pattern) {
files.push(path.to_path_buf());
}
} else {
files.push(path.to_path_buf());
}
return Ok(files);
}
// Directory walk
let mut walker = WalkDir::new(path);
if !recursive {
walker = walker.max_depth(1);
} else if max_depth > 0 {
walker = walker.max_depth(max_depth);
}
for entry in walker.into_iter().filter_map(|e| e.ok()) {
let entry_path = entry.path();
if !entry_path.is_file() {
continue;
}
// Filter by glob pattern if provided
if glob_pattern.is_some_and(|pattern| !matches_glob(entry_path, pattern)) {
continue;
}
files.push(entry_path.to_path_buf());
}
Ok(files)
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn legacy_mode_and_features_flags_parse_but_do_not_change_server_shape() {
let tmp = tempdir().unwrap();
let config_path = tmp.path().join("config.toml");
std::fs::write(
&config_path,
"mode = \"memory\"\nfeatures = \"memory,search\"\n",
)
.unwrap();
let cli = Cli::parse_from([
"rust-memex",
"--config",
config_path.to_str().unwrap(),
"--mode",
"full",
"--features",
"filesystem,memory,search",
"serve",
]);
let config = cli.into_server_config().unwrap();
let defaults = ServerConfig::default();
assert_eq!(config.db_path, defaults.db_path);
assert_eq!(config.cache_mb, defaults.cache_mb);
assert_eq!(config.max_request_bytes, defaults.max_request_bytes);
assert_eq!(config.allowed_paths, defaults.allowed_paths);
}
#[test]
fn dashboard_command_parses_without_explicit_port() {
let cli = Cli::parse_from(["rust-memex", "dashboard"]);
match cli.command {
Some(Commands::Dashboard { port, no_open }) => {
assert_eq!(port, None);
assert!(!no_open);
}
other => panic!("expected dashboard command, got {:?}", other),
}
}
#[test]
fn sse_command_parses_without_explicit_port() {
let cli = Cli::parse_from(["rust-memex", "sse"]);
match cli.command {
Some(Commands::Sse { port }) => assert_eq!(port, None),
other => panic!("expected sse command, got {:?}", other),
}
}
#[test]
fn repair_writes_command_parses() {
let cli = Cli::parse_from(["rust-memex", "repair-writes", "--execute", "-n", "memories"]);
match cli.command {
Some(Commands::RepairWrites {
namespace,
execute,
json,
}) => {
assert_eq!(namespace.as_deref(), Some("memories"));
assert!(execute);
assert!(!json);
}
other => panic!("expected repair-writes command, got {:?}", other),
}
}
#[test]
fn auth_mode_flag_parses_all_routes() {
let cli = Cli::parse_from(["rust-memex", "--auth-mode", "all-routes", "serve"]);
assert_eq!(cli.auth_mode, "all-routes");
}
#[test]
fn auth_mode_defaults_to_mutating_only() {
let cli = Cli::parse_from(["rust-memex", "serve"]);
assert_eq!(cli.auth_mode, "mutating-only");
}
#[test]
fn allow_network_without_auth_parses() {
let cli = Cli::parse_from(["rust-memex", "--allow-network-without-auth", "serve"]);
assert!(cli.allow_network_without_auth);
}
#[test]
fn allow_query_token_parses() {
let cli = Cli::parse_from(["rust-memex", "--allow-query-token", "serve"]);
assert!(cli.allow_query_token);
}
#[test]
fn auth_mode_rejects_invalid_value() {
let result = Cli::try_parse_from(["rust-memex", "--auth-mode", "bogus", "serve"]);
assert!(result.is_err());
}
#[test]
fn index_command_outer_synthesis_defaults_to_keyword() {
let cli = Cli::parse_from(["rust-memex", "index", "/tmp"]);
match cli.command {
Some(Commands::Index {
outer_synthesis,
ollama_model,
ollama_endpoint,
..
}) => {
assert_eq!(outer_synthesis, "keyword");
// Defaults are still populated even on the keyword path so the
// CLI surface stays consistent; downstream parser ignores them.
assert_eq!(ollama_model, "qwen2.5:3b");
assert_eq!(ollama_endpoint, "http://localhost:11434");
}
other => panic!("expected index command, got {:?}", other),
}
}
#[test]
fn index_command_accepts_outer_synthesis_llm_with_overrides() {
let cli = Cli::parse_from([
"rust-memex",
"index",
"/tmp",
"--slice-mode",
"onion",
"--pipeline",
"--outer-synthesis",
"llm",
"--ollama-model",
"phi-3.5:mini",
"--ollama-endpoint",
"http://10.0.0.5:11434",
]);
match cli.command {
Some(Commands::Index {
outer_synthesis,
ollama_model,
ollama_endpoint,
pipeline,
slice_mode,
..
}) => {
assert_eq!(outer_synthesis, "llm");
assert_eq!(ollama_model, "phi-3.5:mini");
assert_eq!(ollama_endpoint, "http://10.0.0.5:11434");
assert!(pipeline);
assert_eq!(slice_mode, "onion");
}
other => panic!("expected index command, got {:?}", other),
}
}
#[test]
fn index_command_rejects_unknown_outer_synthesis() {
let result = Cli::try_parse_from([
"rust-memex",
"index",
"/tmp",
"--outer-synthesis",
"transformers",
]);
assert!(
result.is_err(),
"clap must reject unknown --outer-synthesis values up-front"
);
}
// -------------------------------------------------------------------------
// Spec P4: --allow-duplicates escape hatch
// -------------------------------------------------------------------------
#[test]
fn index_command_allow_duplicates_defaults_to_false() {
let cli = Cli::parse_from(["rust-memex", "index", "/tmp"]);
match cli.command {
Some(Commands::Index {
allow_duplicates,
dedup,
..
}) => {
assert!(
!allow_duplicates,
"default must be false so the safe path (dedup-on) is the default"
);
assert!(
dedup,
"dedup default must remain true; allow-duplicates is the explicit override"
);
}
other => panic!("expected index command, got {:?}", other),
}
}
#[test]
fn index_command_accepts_allow_duplicates_flag() {
let cli = Cli::parse_from(["rust-memex", "index", "/tmp", "--allow-duplicates"]);
match cli.command {
Some(Commands::Index {
allow_duplicates,
dedup,
..
}) => {
assert!(allow_duplicates);
// The flag itself does not flip --dedup at parse time; the
// dispatcher applies the precedence at run time so the user
// sees a "Note: ..." breadcrumb when both flags are set.
assert!(dedup);
}
other => panic!("expected index command, got {:?}", other),
}
}
#[test]
fn reprocess_command_accepts_allow_duplicates_flag() {
let cli = Cli::parse_from([
"rust-memex",
"reprocess",
"-n",
"kb:rebuilt",
"-i",
"/tmp/export.jsonl",
"--allow-duplicates",
]);
match cli.command {
Some(Commands::Reprocess {
allow_duplicates, ..
}) => {
assert!(allow_duplicates);
}
other => panic!("expected reprocess command, got {:?}", other),
}
}
#[test]
fn reindex_command_accepts_allow_duplicates_flag() {
let cli = Cli::parse_from([
"rust-memex",
"reindex",
"-n",
"kb:transcripts",
"--allow-duplicates",
]);
match cli.command {
Some(Commands::Reindex {
allow_duplicates, ..
}) => {
assert!(allow_duplicates);
}
other => panic!("expected reindex command, got {:?}", other),
}
}
// -------------------------------------------------------------------------
// Spec memex-001: `migrate-schema` CLI surface
// -------------------------------------------------------------------------
#[test]
fn migrate_schema_command_defaults_to_v4_live_run() {
let cli = Cli::parse_from(["rust-memex", "migrate-schema"]);
match cli.command {
Some(Commands::MigrateSchema { target, check_only }) => {
assert_eq!(target, SchemaVersion::V4);
assert!(!check_only);
}
other => panic!("expected migrate-schema, got {:?}", other),
}
}
#[test]
fn migrate_schema_command_accepts_check_only_and_target_alias() {
let cli = Cli::parse_from([
"rust-memex",
"migrate-schema",
"--target",
"4",
"--check-only",
]);
match cli.command {
Some(Commands::MigrateSchema { target, check_only }) => {
assert_eq!(target, SchemaVersion::V4);
assert!(check_only);
}
other => panic!("expected migrate-schema, got {:?}", other),
}
}
#[test]
fn auto_migrate_defaults_off_and_is_global_for_daemon_modes() {
let cli = Cli::parse_from(["rust-memex", "sse"]);
assert!(!cli.auto_migrate);
let cli = Cli::parse_from(["rust-memex", "--auto-migrate", "sse"]);
assert!(cli.auto_migrate);
let cli = Cli::parse_from(["rust-memex", "dashboard", "--auto-migrate"]);
assert!(cli.auto_migrate);
}
// -------------------------------------------------------------------------
// Spec P0 backfill: `backfill-hashes` CLI surface
// -------------------------------------------------------------------------
#[test]
fn backfill_hashes_command_defaults_to_dry_run_all_namespaces() {
let cli = Cli::parse_from(["rust-memex", "backfill-hashes"]);
match cli.command {
Some(Commands::BackfillHashes {
namespace,
dry_run,
json,
..
}) => {
assert!(namespace.is_none(), "no -n means all namespaces");
assert!(dry_run, "default must be dry-run for safety");
assert!(!json);
}
other => panic!("expected backfill-hashes, got {:?}", other),
}
}
#[test]
fn backfill_hashes_command_accepts_namespace_and_live_run() {
let cli = Cli::parse_from([
"rust-memex",
"backfill-hashes",
"-n",
"kb:transcripts",
"--dry-run",
"false",
"--json",
]);
match cli.command {
Some(Commands::BackfillHashes {
namespace,
dry_run,
json,
..
}) => {
assert_eq!(namespace.as_deref(), Some("kb:transcripts"));
assert!(!dry_run, "operator opted into a live write");
assert!(json);
}
other => panic!("expected backfill-hashes, got {:?}", other),
}
}
}