use anyhow::{Context, Result, bail};
use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
use serde_json::{Value, json};
use sifs::{
CacheConfig, EncoderSpec, IndexOptions, IndexStats, ModelLoadPolicy, ModelOptions, SearchMode,
SearchOptions, SearchResult, SifsIndex, cache_summary, format_results, is_git_url,
load_model_with_options, model_status, platform_cache_root, resolve_chunk,
};
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command as ProcessCommand;
use std::time::Instant;
#[derive(Parser)]
#[command(
name = "sifs",
about = "SIFS Is Fast Search: instant local code search for agents."
)]
struct Cli {
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand)]
enum Command {
#[command(about = "Search a local directory or Git URL with natural-language or code queries.")]
Search {
#[arg(help = "Natural-language, code, symbol, or literal query to search for.")]
query: String,
#[arg(
default_value = ".",
help = "Local directory or Git URL to index and search."
)]
path: String,
#[arg(
short = 'k',
long = "top-k",
default_value_t = 5,
help = "Maximum number of ranked chunks to print."
)]
top_k: usize,
#[arg(short = 'm', long = "mode", value_enum, default_value_t = ModeArg::Hybrid, help = "Ranking mode: hybrid for most searches, bm25 for exact symbols, semantic for conceptual queries.")]
mode: ModeArg,
#[arg(
long = "language",
help = "Only search chunks with this language label. Repeatable."
)]
languages: Vec<String>,
#[arg(
long = "path",
help = "Only search this repository-relative file path. Repeatable."
)]
filter_paths: Vec<String>,
#[arg(
long,
default_value_t = 0,
help = "Include surrounding lines when local source files are available."
)]
context_lines: usize,
#[arg(
long,
help = "Include query, ranking, and filter metadata in the output."
)]
explain: bool,
#[command(flatten)]
output: OutputArgs,
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, value_enum, default_value_t = EncoderArg::Model2Vec, help = "Encoder for semantic and hybrid search.")]
encoder: EncoderArg,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
#[arg(long, help = "Use a custom persistent index cache directory.")]
cache_dir: Option<PathBuf>,
#[arg(long, help = "Disable persistent index caches.")]
no_cache: bool,
#[arg(long, help = "Use a project-local .sifs cache.")]
project_cache: bool,
},
#[command(about = "Find chunks related to a known file and one-based line number.")]
FindRelated {
#[arg(help = "Repository-relative file path, usually copied from a search result.")]
file_path: String,
#[arg(help = "One-based line number inside the source chunk.")]
line: usize,
#[arg(
default_value = ".",
help = "Local directory or Git URL to index and search."
)]
path: String,
#[arg(
short = 'k',
long = "top-k",
default_value_t = 5,
help = "Maximum number of related chunks to print."
)]
top_k: usize,
#[command(flatten)]
output: OutputArgs,
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, value_enum, default_value_t = EncoderArg::Model2Vec, help = "Encoder for related-code search.")]
encoder: EncoderArg,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
#[arg(long, help = "Use a custom persistent index cache directory.")]
cache_dir: Option<PathBuf>,
#[arg(long, help = "Disable persistent index caches.")]
no_cache: bool,
#[arg(long, help = "Use a project-local .sifs cache.")]
project_cache: bool,
},
#[command(about = "Start, install, or inspect the SIFS stdio MCP server.")]
Mcp {
#[command(subcommand)]
command: Option<McpCommand>,
#[arg(help = "Local directory or git URL to pre-index.")]
path: Option<String>,
#[arg(long = "ref", help = "Branch or tag to check out for a Git URL.")]
ref_name: Option<String>,
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
#[arg(long, help = "Use a custom persistent index cache directory.")]
cache_dir: Option<PathBuf>,
#[arg(long, help = "Disable persistent index caches.")]
no_cache: bool,
#[arg(long, help = "Use a project-local .sifs cache.")]
project_cache: bool,
},
#[command(about = "List repository-relative file paths included in the index.")]
Files {
#[arg(
default_value = ".",
help = "Local directory or Git URL to index and inspect."
)]
path: String,
#[arg(
long,
default_value_t = 200,
help = "Maximum number of file paths to print."
)]
limit: usize,
#[command(flatten)]
output: OutputArgs,
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
},
#[command(about = "Print index status for a local directory or Git URL.")]
Status {
#[arg(
default_value = ".",
help = "Local directory or Git URL to index and inspect."
)]
path: String,
#[arg(long, help = "Print structured JSON output.")]
json: bool,
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
},
#[command(about = "Print the indexed chunk containing a file and one-based line number.")]
Get {
#[arg(help = "Repository-relative file path.")]
file_path: String,
#[arg(help = "One-based line number inside the source chunk.")]
line: usize,
#[arg(
default_value = ".",
help = "Local directory or Git URL to index and inspect."
)]
path: String,
#[command(flatten)]
output: OutputArgs,
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
},
#[command(about = "Remove SIFS cache artifacts from a local directory.")]
Clean {
#[arg(
default_value = ".",
help = "Local directory whose .sifs cache should be removed."
)]
path: String,
},
#[command(about = "Inspect or clean SIFS persistent index caches.")]
Cache {
#[command(subcommand)]
command: CacheCommand,
},
#[command(about = "Check SIFS model, cache, and offline readiness.")]
Doctor {
#[arg(long, help = "Model path or Hugging Face model id.")]
model: Option<String>,
#[arg(long, value_enum, default_value_t = EncoderArg::Model2Vec)]
encoder: EncoderArg,
#[arg(
default_value = ".",
help = "Local directory to inspect for cache readiness."
)]
path: String,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
},
#[command(about = "Manage the SIFS embedding model cache.")]
Model {
#[command(subcommand)]
command: ModelCommand,
},
#[command(about = "Create the Claude agent file at .claude/agents/sifs-search.md.")]
Init {
#[arg(long, help = "Overwrite an existing generated agent file.")]
force: bool,
},
#[command(about = "Print SIFS agent, CLI, and MCP capabilities.")]
Capabilities,
}
#[derive(Subcommand)]
enum McpCommand {
#[command(about = "Configure SIFS as a local stdio MCP server for Codex and/or Claude Code.")]
Install {
#[arg(long, value_enum, default_value_t = McpClientArg::All)]
client: McpClientArg,
#[arg(long, value_enum)]
scope: Option<McpScopeArg>,
#[arg(
long,
help = "Local directory or Git URL to expose through the MCP server."
)]
source: Option<String>,
#[arg(
long = "ref",
help = "Branch or tag to check out for a Git URL source."
)]
ref_name: Option<String>,
#[arg(long, default_value = "sifs", help = "MCP server name to configure.")]
name: String,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
#[arg(long, help = "Use a custom persistent index cache directory.")]
cache_dir: Option<PathBuf>,
#[arg(long, help = "Disable persistent index caches.")]
no_cache: bool,
#[arg(long, help = "Use a project-local .sifs cache.")]
project_cache: bool,
#[arg(long, help = "Replace an existing same-name MCP server.")]
force: bool,
#[arg(
long,
help = "Print commands and config without changing client state."
)]
dry_run: bool,
},
#[command(about = "Inspect local MCP install readiness.")]
Doctor {
#[arg(
default_value = ".",
help = "Local directory or Git URL the MCP server would expose."
)]
source: String,
#[arg(
long = "ref",
help = "Branch or tag to check out for a Git URL source."
)]
ref_name: Option<String>,
#[arg(long, help = "Disable model downloads and remote Git sources.")]
offline: bool,
#[arg(long = "no-download", help = "Disable model downloads.")]
no_download: bool,
#[arg(long, help = "Use a custom persistent index cache directory.")]
cache_dir: Option<PathBuf>,
#[arg(long, help = "Disable persistent index caches.")]
no_cache: bool,
#[arg(long, help = "Use a project-local .sifs cache.")]
project_cache: bool,
},
}
#[derive(Subcommand)]
enum ModelCommand {
#[command(about = "Download or validate the embedding model in the Hugging Face cache.")]
Pull {
#[arg(long)]
model: Option<String>,
},
#[command(about = "Alias for `pull`: download or validate the embedding model.")]
Fetch {
#[arg(long)]
model: Option<String>,
},
#[command(about = "Report whether the embedding model is available locally.")]
Status {
#[arg(long)]
model: Option<String>,
#[arg(long)]
json: bool,
},
}
#[derive(Subcommand)]
enum CacheCommand {
#[command(about = "Report persistent index cache location and size.")]
Status {
#[arg(long)]
cache_dir: Option<PathBuf>,
#[arg(long)]
json: bool,
},
#[command(about = "Delete the SIFS persistent index cache.")]
Clean {
#[arg(long)]
cache_dir: Option<PathBuf>,
#[arg(long)]
dry_run: bool,
},
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum ModeArg {
Hybrid,
Semantic,
Bm25,
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum EncoderArg {
#[value(name = "model2vec")]
Model2Vec,
Hashing,
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum McpClientArg {
Codex,
Claude,
All,
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum McpScopeArg {
Local,
Project,
User,
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum TextFormat {
Human,
Compact,
}
#[derive(Args)]
struct OutputArgs {
#[arg(long, conflicts_with = "jsonl", help = "Print one pretty JSON object.")]
json: bool,
#[arg(long, help = "Print newline-delimited JSON records.")]
jsonl: bool,
#[arg(long, value_enum, default_value_t = TextFormat::Human, help = "Text output format.")]
format: TextFormat,
}
impl From<ModeArg> for SearchMode {
fn from(value: ModeArg) -> Self {
match value {
ModeArg::Hybrid => SearchMode::Hybrid,
ModeArg::Semantic => SearchMode::Semantic,
ModeArg::Bm25 => SearchMode::Bm25,
}
}
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
Some(Command::Search {
query,
path,
top_k,
mode,
languages,
filter_paths,
context_lines,
explain,
output,
model,
encoder,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
}) => run_search(SearchCommand {
query,
path,
top_k,
mode: SearchMode::from(mode),
languages,
filter_paths,
context_lines,
explain,
output,
model,
encoder,
offline,
no_download,
cache: cache_config(cache_dir, no_cache, project_cache),
})?,
Some(Command::FindRelated {
file_path,
line,
path,
top_k,
output,
model,
encoder,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
}) => {
let policy = model_policy(offline, no_download);
let started = Instant::now();
let index = build_hybrid_index(
&path,
encoder_spec(encoder, model.as_deref(), policy),
cache_config(cache_dir, no_cache, project_cache),
offline,
)?;
let Some(chunk) = resolve_chunk(&index.chunks, &file_path, line) else {
eprintln!("No chunk found at {file_path}:{line}.");
std::process::exit(1);
};
let results = index.find_related(&chunk, top_k)?;
let elapsed_ms = started.elapsed().as_millis();
let payload = json!({
"source": path,
"file_path": file_path,
"line": line,
"top_k": top_k,
"index_stats": index.stats(),
"elapsed_ms": elapsed_ms,
"results": structured_results(&path, &results, 0),
});
if output.json {
println!("{}", serde_json::to_string_pretty(&payload)?);
} else if output.jsonl {
for result in &results {
println!(
"{}",
serde_json::to_string(&json!({
"source": path,
"file_path": file_path,
"line": line,
"top_k": top_k,
"elapsed_ms": elapsed_ms,
"result": structured_result(&path, result, 0),
}))?
);
}
} else {
match output.format {
TextFormat::Human => {
if results.is_empty() {
println!("No related chunks found for {file_path}:{line}.");
} else {
println!(
"{}",
format_results(
&format!("Chunks related to {file_path}:{line}"),
&results
)
);
}
}
TextFormat::Compact => print_compact_results(&results),
}
}
}
Some(Command::Model { command }) => run_model(command)?,
Some(Command::Cache { command }) => run_cache(command)?,
Some(Command::Doctor {
model,
encoder,
path,
offline,
no_download,
}) => run_doctor(
&path,
encoder,
model.as_deref(),
model_policy(offline, no_download),
offline,
)?,
Some(Command::Mcp {
command,
path,
ref_name,
model,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
}) => match command {
Some(McpCommand::Install {
client,
scope,
source,
ref_name,
name,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
force,
dry_run,
}) => run_mcp_install(McpInstallOptions {
client,
scope,
source,
ref_name,
name,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
force,
dry_run,
})?,
Some(McpCommand::Doctor {
source,
ref_name,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
}) => run_mcp_doctor(McpDoctorOptions {
source,
ref_name,
offline,
no_download,
cache_dir,
no_cache,
project_cache,
})?,
None => {
let policy = model_policy(offline, no_download);
if offline
&& let Some(path) = &path
&& is_git_url(path)
{
bail!("--offline does not allow remote Git sources");
}
sifs::mcp::serve_with_options(
path,
ref_name,
ModelOptions::new(model.as_deref(), policy),
cache_config(cache_dir, no_cache, project_cache),
offline,
)?;
}
},
Some(Command::Files {
path,
limit,
output,
model,
offline,
no_download,
}) => run_files(&path, limit, output, model, offline, no_download)?,
Some(Command::Status {
path,
json,
model,
offline,
no_download,
}) => run_status(&path, json, model, offline, no_download)?,
Some(Command::Get {
file_path,
line,
path,
output,
model,
offline,
no_download,
}) => run_get(&file_path, line, &path, output, model, offline, no_download)?,
Some(Command::Clean { path }) => run_clean(&path)?,
Some(Command::Init { force }) => run_init(force)?,
Some(Command::Capabilities) => print_capabilities(),
None => {
Cli::command().print_help()?;
println!();
}
}
Ok(())
}
struct SearchCommand {
query: String,
path: String,
top_k: usize,
mode: SearchMode,
languages: Vec<String>,
filter_paths: Vec<String>,
context_lines: usize,
explain: bool,
output: OutputArgs,
model: Option<String>,
encoder: EncoderArg,
offline: bool,
no_download: bool,
cache: CacheConfig,
}
fn run_search(command: SearchCommand) -> Result<()> {
let policy = model_policy(command.offline, command.no_download);
let started = Instant::now();
let index = build_index_for_mode(
&command.path,
command.mode,
encoder_spec(command.encoder, command.model.as_deref(), policy),
command.cache,
command.offline,
)?;
let mut options = SearchOptions::new(command.top_k).with_mode(command.mode);
options.filter_languages = command.languages;
options.filter_paths = command.filter_paths;
let results = index.search_with(&command.query, &options)?;
let elapsed_ms = started.elapsed().as_millis();
let warnings = context_warnings(&command.path, command.context_lines);
let payload = search_payload(
&command.query,
&command.path,
&options,
index.stats(),
elapsed_ms,
&warnings,
&results,
command.context_lines,
);
if command.output.json {
println!("{}", serde_json::to_string_pretty(&payload)?);
} else if command.output.jsonl {
for result in &results {
println!(
"{}",
serde_json::to_string(&search_result_record(
&command.query,
&command.path,
&options,
elapsed_ms,
&warnings,
result,
command.context_lines,
))?
);
}
} else {
if command.explain {
print_explanation(
&command.query,
&command.path,
&options,
elapsed_ms,
&warnings,
);
}
match command.output.format {
TextFormat::Human => {
if results.is_empty() {
println!("No results found.");
} else {
println!(
"{}",
format_results(
&format!(
"Search results for: {:?} (mode={})",
command.query, command.mode
),
&results,
)
);
}
}
TextFormat::Compact => print_compact_results(&results),
}
}
Ok(())
}
fn build_index_for_mode(
path: &str,
mode: SearchMode,
encoder_spec: EncoderSpec,
cache: CacheConfig,
offline: bool,
) -> Result<SifsIndex> {
match mode {
SearchMode::Bm25 => build_sparse_index(path, cache, offline),
SearchMode::Semantic | SearchMode::Hybrid => {
build_hybrid_index(path, encoder_spec, cache, offline)
}
}
}
fn build_sparse_index(path: &str, cache: CacheConfig, offline: bool) -> Result<SifsIndex> {
let options = IndexOptions::sparse().with_cache(cache);
if is_git_url(path) {
if offline {
bail!("--offline does not allow remote Git sources");
}
SifsIndex::from_git_with_index_options(path, None, options)
} else {
SifsIndex::from_path_with_index_options(path, options)
}
}
fn build_hybrid_index(
path: &str,
encoder_spec: EncoderSpec,
cache: CacheConfig,
offline: bool,
) -> Result<SifsIndex> {
let options = IndexOptions::sparse()
.with_encoder_spec(encoder_spec)
.with_cache(cache);
if is_git_url(path) {
if offline {
bail!("--offline does not allow remote Git sources");
}
SifsIndex::from_git_with_index_options(path, None, options)
} else {
SifsIndex::from_path_with_index_options(path, options)
}
}
fn cache_config(cache_dir: Option<PathBuf>, no_cache: bool, project_cache: bool) -> CacheConfig {
if no_cache {
CacheConfig::Disabled
} else if project_cache {
CacheConfig::Project
} else if let Some(path) = cache_dir {
CacheConfig::Custom(path)
} else {
CacheConfig::Platform
}
}
fn run_files(
path: &str,
limit: usize,
output: OutputArgs,
model: Option<String>,
offline: bool,
no_download: bool,
) -> Result<()> {
let policy = model_policy(offline, no_download);
let started = Instant::now();
let index = build_hybrid_index(
path,
EncoderSpec::model2vec(model.as_deref(), policy),
CacheConfig::Platform,
offline,
)?;
let elapsed_ms = started.elapsed().as_millis();
let files = index.indexed_files();
let shown: Vec<_> = files.iter().take(limit).cloned().collect();
let payload = json!({
"source": path,
"total": files.len(),
"limit": limit,
"elapsed_ms": elapsed_ms,
"files": shown,
});
if output.json {
println!("{}", serde_json::to_string_pretty(&payload)?);
} else if output.jsonl {
for file in files.iter().take(limit) {
println!(
"{}",
serde_json::to_string(&json!({
"source": path,
"total": files.len(),
"limit": limit,
"file_path": file,
}))?
);
}
} else {
match output.format {
TextFormat::Human => {
println!(
"Indexed files for {path:?} (showing {} of {}):",
shown.len(),
files.len()
);
for file in shown {
println!("{file}");
}
}
TextFormat::Compact => {
for file in shown {
println!("{file}");
}
}
}
}
Ok(())
}
fn run_status(
path: &str,
json_output: bool,
model: Option<String>,
offline: bool,
no_download: bool,
) -> Result<()> {
let policy = model_policy(offline, no_download);
let started = Instant::now();
let index = build_hybrid_index(
path,
EncoderSpec::model2vec(model.as_deref(), policy),
CacheConfig::Platform,
offline,
)?;
let elapsed_ms = started.elapsed().as_millis();
let stats = index.stats();
let payload = json!({
"source": path,
"index_stats": stats,
"elapsed_ms": elapsed_ms,
"semantic_index_available": semantic_index_available(path),
"semantic_index_loaded": index.semantic_loaded(),
});
if json_output {
println!("{}", serde_json::to_string_pretty(&payload)?);
} else {
println!(
"Index status for {path:?}: {} files, {} chunks, languages: {}. Semantic index available: {}. Semantic index loaded: {}.",
stats.indexed_files,
stats.total_chunks,
format_languages(&stats.languages),
semantic_index_available(path),
index.semantic_loaded()
);
}
Ok(())
}
fn semantic_index_available(path: &str) -> bool {
if is_git_url(path) {
return false;
}
let cache_dir = Path::new(path).join(".sifs");
let Ok(entries) = fs::read_dir(cache_dir) else {
return false;
};
entries.filter_map(Result::ok).any(|entry| {
entry
.file_name()
.to_string_lossy()
.starts_with("semantic-v2-")
})
}
fn run_get(
file_path: &str,
line: usize,
path: &str,
output: OutputArgs,
model: Option<String>,
offline: bool,
no_download: bool,
) -> Result<()> {
let policy = model_policy(offline, no_download);
let index = build_hybrid_index(
path,
EncoderSpec::model2vec(model.as_deref(), policy),
CacheConfig::Platform,
offline,
)?;
let Some(chunk) = resolve_chunk(&index.chunks, file_path, line) else {
eprintln!("No chunk found at {file_path}:{line}. Use `sifs files` to check indexed paths.");
std::process::exit(1);
};
let payload = json!({
"source": path,
"chunk": chunk,
});
if output.json {
println!("{}", serde_json::to_string_pretty(&payload)?);
} else if output.jsonl {
println!("{}", serde_json::to_string(&payload)?);
} else {
match output.format {
TextFormat::Human => {
println!(
"{}\n\n```{}\n{}\n```",
chunk.location(),
chunk.language.clone().unwrap_or_default(),
chunk.content
);
}
TextFormat::Compact => {
println!(
"{}\t{}\t{}",
chunk.location(),
chunk.language.clone().unwrap_or_default(),
chunk.content.lines().next().unwrap_or_default().trim()
);
}
}
}
Ok(())
}
fn run_clean(path: &str) -> Result<()> {
if is_git_url(path) {
bail!("clean only supports local directories");
}
let cache_dir = Path::new(path).join(".sifs");
if cache_dir.exists() {
fs::remove_dir_all(&cache_dir)
.with_context(|| format!("remove SIFS cache {}", cache_dir.display()))?;
println!("Removed {}", cache_dir.display());
} else {
println!("No SIFS cache found at {}", cache_dir.display());
}
Ok(())
}
fn search_payload(
query: &str,
source: &str,
options: &SearchOptions,
stats: IndexStats,
elapsed_ms: u128,
warnings: &[String],
results: &[SearchResult],
context_lines: usize,
) -> Value {
json!({
"query": query,
"mode": options.mode.to_string(),
"source": source,
"top_k": options.top_k,
"filter_languages": options.filter_languages,
"filter_paths": options.filter_paths,
"index_stats": stats,
"elapsed_ms": elapsed_ms,
"warnings": warnings,
"results": structured_results(source, results, context_lines),
})
}
fn search_result_record(
query: &str,
source: &str,
options: &SearchOptions,
elapsed_ms: u128,
warnings: &[String],
result: &SearchResult,
context_lines: usize,
) -> Value {
json!({
"query": query,
"mode": options.mode.to_string(),
"source": source,
"top_k": options.top_k,
"filter_languages": options.filter_languages,
"filter_paths": options.filter_paths,
"elapsed_ms": elapsed_ms,
"warnings": warnings,
"result": structured_result(source, result, context_lines),
})
}
fn structured_results(source: &str, results: &[SearchResult], context_lines: usize) -> Value {
json!(
results
.iter()
.map(|result| structured_result(source, result, context_lines))
.collect::<Vec<_>>()
)
}
fn structured_result(source: &str, result: &SearchResult, context_lines: usize) -> Value {
let mut value = json!({
"file_path": result.chunk.file_path,
"start_line": result.chunk.start_line,
"end_line": result.chunk.end_line,
"language": result.chunk.language,
"score": result.score,
"source": result.source.to_string(),
"content": result.chunk.content,
});
if context_lines > 0
&& let Some(context) = expanded_context(
source,
&result.chunk.file_path,
result.chunk.start_line,
result.chunk.end_line,
context_lines,
)
{
value["context"] = context;
}
value
}
fn expanded_context(
source: &str,
file_path: &str,
start_line: usize,
end_line: usize,
context_lines: usize,
) -> Option<Value> {
if is_git_url(source) {
return None;
}
let full_path = Path::new(source).join(file_path);
let content = fs::read_to_string(full_path).ok()?;
let lines: Vec<_> = content.lines().collect();
if lines.is_empty() {
return None;
}
let context_start = start_line.saturating_sub(context_lines).max(1);
let context_end = (end_line + context_lines).min(lines.len());
let text = lines
.get(context_start - 1..context_end)?
.iter()
.copied()
.collect::<Vec<_>>()
.join("\n");
Some(json!({
"start_line": context_start,
"end_line": context_end,
"content": text,
}))
}
fn context_warnings(source: &str, context_lines: usize) -> Vec<String> {
if context_lines > 0 && is_git_url(source) {
vec![
"context_lines is best-effort and is not expanded for Git URL sources in CLI mode"
.to_owned(),
]
} else {
Vec::new()
}
}
fn print_explanation(
query: &str,
source: &str,
options: &SearchOptions,
elapsed_ms: u128,
warnings: &[String],
) {
println!(
"Query: {query:?}; source: {source}; mode: {}; top_k: {}; languages: [{}]; paths: [{}]; elapsed_ms: {elapsed_ms}",
options.mode,
options.top_k,
options.filter_languages.join(", "),
options.filter_paths.join(", ")
);
for warning in warnings {
println!("Warning: {warning}");
}
}
fn print_compact_results(results: &[SearchResult]) {
for result in results {
println!(
"{}\t{:.4}\t{}\t{}",
result.chunk.location(),
result.score,
result.source,
result
.chunk
.content
.lines()
.next()
.unwrap_or_default()
.trim()
);
}
}
fn format_languages(languages: &std::collections::BTreeMap<String, usize>) -> String {
if languages.is_empty() {
return "none".to_owned();
}
languages
.iter()
.map(|(language, count)| format!("{language}={count}"))
.collect::<Vec<_>>()
.join(", ")
}
fn model_policy(offline: bool, no_download: bool) -> ModelLoadPolicy {
if offline {
ModelLoadPolicy::Offline
} else if no_download {
ModelLoadPolicy::NoDownload
} else {
ModelLoadPolicy::AllowDownload
}
}
fn encoder_spec(encoder: EncoderArg, model: Option<&str>, policy: ModelLoadPolicy) -> EncoderSpec {
match encoder {
EncoderArg::Model2Vec => EncoderSpec::model2vec(model, policy),
EncoderArg::Hashing => EncoderSpec::hashing(),
}
}
fn run_model(command: ModelCommand) -> Result<()> {
match command {
ModelCommand::Pull { model } | ModelCommand::Fetch { model } => {
let options = ModelOptions::new(model.as_deref(), ModelLoadPolicy::AllowDownload);
load_model_with_options(&options)?;
println!("Model is available: {}", options.model);
}
ModelCommand::Status { model, json } => {
let status = model_status(model.as_deref());
if json {
println!(
"{}",
serde_json::to_string_pretty(&serde_json::json!({
"model": status.model,
"available": status.available(),
"tokenizer": status.tokenizer,
"safetensors": status.safetensors,
"config": status.config,
}))?
);
} else if status.available() {
println!("Model is available locally: {}", status.model);
} else {
println!("Model is not available locally: {}", status.model);
}
}
}
Ok(())
}
fn run_doctor(
path: &str,
encoder: EncoderArg,
model: Option<&str>,
policy: ModelLoadPolicy,
offline: bool,
) -> Result<()> {
println!("SIFS doctor");
println!("Path: {path}");
if is_git_url(path) {
println!("Source: remote Git URL");
println!(
"Remote Git under offline mode: {}",
if offline { "blocked" } else { "allowed" }
);
} else {
let path = PathBuf::from(path);
println!("Source: local directory");
println!("Path exists: {}", path.exists());
println!("Path is directory: {}", path.is_dir());
if path.is_dir() {
let cache_dir = path.join(".sifs");
let cache_writable = if cache_dir.exists() {
fs::metadata(&cache_dir)
.map(|metadata| !metadata.permissions().readonly())
.unwrap_or(false)
} else {
fs::metadata(&path)
.map(|metadata| !metadata.permissions().readonly())
.unwrap_or(false)
};
println!("Cache directory: {}", cache_dir.display());
println!("Cache writable: {cache_writable}");
}
}
match encoder {
EncoderArg::Hashing => {
println!("Encoder: hashing");
println!("Semantic readiness: ready without model files");
}
EncoderArg::Model2Vec => {
let options = ModelOptions::new(model, policy);
let status = model_status(Some(&options.model));
println!("Encoder: model2vec");
println!("Model: {}", status.model);
println!("Tokenizer: {}", file_status(status.tokenizer.as_ref()));
println!("Safetensors: {}", file_status(status.safetensors.as_ref()));
println!("Config: {}", file_status(status.config.as_ref()));
println!(
"Semantic readiness: {}",
if status.available() {
"ready from local files"
} else if policy == ModelLoadPolicy::AllowDownload {
"will download on first semantic/hybrid search"
} else {
"not ready in offline/no-download mode"
}
);
}
}
Ok(())
}
fn file_status(path: Option<&PathBuf>) -> String {
path.map(|path| format!("found at {}", path.display()))
.unwrap_or_else(|| "missing".to_owned())
}
struct McpInstallOptions {
client: McpClientArg,
scope: Option<McpScopeArg>,
source: Option<String>,
ref_name: Option<String>,
name: String,
offline: bool,
no_download: bool,
cache_dir: Option<PathBuf>,
no_cache: bool,
project_cache: bool,
force: bool,
dry_run: bool,
}
struct McpDoctorOptions {
source: String,
ref_name: Option<String>,
offline: bool,
no_download: bool,
cache_dir: Option<PathBuf>,
no_cache: bool,
project_cache: bool,
}
struct McpConfig {
sifs_path: PathBuf,
source: String,
ref_name: Option<String>,
offline: bool,
no_download: bool,
cache_dir: Option<PathBuf>,
no_cache: bool,
project_cache: bool,
}
fn run_mcp_install(options: McpInstallOptions) -> Result<()> {
let source = resolve_mcp_source(options.source.as_deref(), options.offline)?;
let config = McpConfig {
sifs_path: std::env::current_exe().context("resolve current sifs executable")?,
source,
ref_name: options.ref_name,
offline: options.offline,
no_download: options.no_download,
cache_dir: options.cache_dir,
no_cache: options.no_cache,
project_cache: options.project_cache,
};
warn_if_development_binary(&config.sifs_path);
if !options.dry_run && stable_binary_path(&config.sifs_path) == "no" {
bail!(
"refusing to write durable MCP config for development binary {}. Install with `cargo install --locked sifs` or Homebrew, then rerun this command.",
config.sifs_path.display()
);
}
match options.client {
McpClientArg::Codex => {
install_codex(&options.name, &config, options.force, options.dry_run)?
}
McpClientArg::Claude => install_claude(
&options.name,
options.scope,
&config,
options.force,
options.dry_run,
)?,
McpClientArg::All => {
install_codex(&options.name, &config, options.force, options.dry_run)?;
install_claude(
&options.name,
options.scope,
&config,
options.force,
options.dry_run,
)?;
}
}
Ok(())
}
fn run_mcp_doctor(options: McpDoctorOptions) -> Result<()> {
let source = resolve_mcp_source(Some(&options.source), options.offline)?;
let config = McpConfig {
sifs_path: std::env::current_exe().context("resolve current sifs executable")?,
source,
ref_name: options.ref_name,
offline: options.offline,
no_download: options.no_download,
cache_dir: options.cache_dir,
no_cache: options.no_cache,
project_cache: options.project_cache,
};
println!("SIFS MCP doctor");
println!("SIFS executable: {}", config.sifs_path.display());
println!(
"Stable install path: {}",
stable_binary_path(&config.sifs_path)
);
println!("Codex CLI: {}", command_status("codex"));
println!("Claude CLI: {}", command_status("claude"));
println!(
"MCP command: {}",
display_command(&mcp_server_command(&config))
);
if !is_git_url(&config.source) {
let smoke = ProcessCommand::new(&config.sifs_path)
.args([
"search",
"sifs_mcp_smoke",
&config.source,
"--mode",
"bm25",
"--offline",
"--no-cache",
])
.output();
match smoke {
Ok(output) if output.status.success() => println!("BM25 smoke: passed"),
Ok(output) => println!(
"BM25 smoke: failed ({})",
String::from_utf8_lossy(&output.stderr).trim()
),
Err(error) => println!("BM25 smoke: could not run ({error})"),
}
} else {
println!("BM25 smoke: skipped for Git URL source");
}
Ok(())
}
fn install_codex(name: &str, config: &McpConfig, force: bool, dry_run: bool) -> Result<()> {
let add_args = codex_add_args(name, config);
println!("Codex MCP:");
println!(
" {}",
display_command(&prepend_command("codex", &add_args))
);
if dry_run {
println!(
"\nCodex config.toml fallback:\n{}",
codex_toml(name, config)
);
return Ok(());
}
if command_path("codex").is_none() {
println!("codex was not found on PATH. Add this to ~/.codex/config.toml instead:");
println!("{}", codex_toml(name, config));
return Ok(());
}
if mcp_server_exists("codex", name, None)? {
if !force {
bail!("Codex MCP server {name:?} already exists. Re-run with --force to replace it.");
}
run_checked("codex", &["mcp", "remove", name])?;
}
run_checked_owned("codex", &add_args)?;
println!("Configured Codex MCP server {name:?}.");
Ok(())
}
fn install_claude(
name: &str,
scope: Option<McpScopeArg>,
config: &McpConfig,
force: bool,
dry_run: bool,
) -> Result<()> {
let scope = scope.unwrap_or(McpScopeArg::Local);
let add_args = claude_add_args(name, scope, config)?;
println!("Claude Code MCP:");
println!(
" {}",
display_command(&prepend_command("claude", &add_args))
);
if dry_run {
println!(
"\nClaude .mcp.json fallback:\n{}",
claude_project_json(name, config)?
);
return Ok(());
}
if command_path("claude").is_none() {
if matches!(scope, McpScopeArg::Project) {
bail!(
"claude was not found on PATH. Project-scoped .mcp.json should be written through Claude Code; install Claude or use --dry-run to print the fallback config."
);
}
println!(
"claude was not found on PATH. Add this project config only in trusted repositories:"
);
println!("{}", claude_project_json(name, config)?);
return Ok(());
}
if mcp_server_exists("claude", name, Some(scope))? {
if !force {
bail!("Claude MCP server {name:?} already exists. Re-run with --force to replace it.");
}
let mut remove_args = vec!["mcp".to_owned(), "remove".to_owned(), name.to_owned()];
remove_args.push("--scope".to_owned());
remove_args.push(scope_arg(scope).to_owned());
run_checked_owned("claude", &remove_args)?;
}
run_checked_owned("claude", &add_args)?;
println!(
"Configured Claude Code MCP server {name:?} at {} scope.",
scope_arg(scope)
);
Ok(())
}
fn resolve_mcp_source(source: Option<&str>, offline: bool) -> Result<String> {
let source = match source {
Some(source) => source.to_owned(),
None => std::env::current_dir()
.context("resolve current directory")?
.to_string_lossy()
.into_owned(),
};
if is_git_url(&source) {
if offline {
bail!("--offline does not allow remote Git sources");
}
return Ok(source);
}
let path = PathBuf::from(source);
if !path.exists() {
bail!("local MCP source does not exist: {}", path.display());
}
if !path.is_dir() {
bail!("local MCP source is not a directory: {}", path.display());
}
Ok(path
.canonicalize()
.with_context(|| format!("canonicalize MCP source {}", path.display()))?
.to_string_lossy()
.into_owned())
}
fn mcp_args(config: &McpConfig) -> Vec<String> {
let mut args = vec!["mcp".to_owned(), config.source.clone()];
if let Some(ref_name) = &config.ref_name {
args.push("--ref".to_owned());
args.push(ref_name.clone());
}
if config.offline {
args.push("--offline".to_owned());
}
if config.no_download {
args.push("--no-download".to_owned());
}
if let Some(cache_dir) = &config.cache_dir {
args.push("--cache-dir".to_owned());
args.push(cache_dir.to_string_lossy().into_owned());
}
if config.no_cache {
args.push("--no-cache".to_owned());
}
if config.project_cache {
args.push("--project-cache".to_owned());
}
args
}
fn mcp_server_command(config: &McpConfig) -> Vec<String> {
let mut command = vec![config.sifs_path.to_string_lossy().into_owned()];
command.extend(mcp_args(config));
command
}
fn codex_add_args(name: &str, config: &McpConfig) -> Vec<String> {
let mut args = vec![
"mcp".to_owned(),
"add".to_owned(),
name.to_owned(),
"--".to_owned(),
];
args.extend(mcp_server_command(config));
args
}
fn claude_add_args(name: &str, scope: McpScopeArg, config: &McpConfig) -> Result<Vec<String>> {
Ok(vec![
"mcp".to_owned(),
"add-json".to_owned(),
name.to_owned(),
claude_server_json(config)?,
"--scope".to_owned(),
scope_arg(scope).to_owned(),
])
}
fn codex_toml(name: &str, config: &McpConfig) -> String {
let args = mcp_args(config)
.iter()
.map(|arg| serde_json::to_string(arg).unwrap())
.collect::<Vec<_>>()
.join(", ");
format!(
"[mcp_servers.{name}]\ncommand = {}\nargs = [{args}]\nstartup_timeout_sec = 20\ntool_timeout_sec = 60\n",
toml_string(&config.sifs_path.to_string_lossy())
)
}
fn claude_server_json(config: &McpConfig) -> Result<String> {
Ok(serde_json::to_string(&json!({
"type": "stdio",
"command": config.sifs_path.to_string_lossy(),
"args": mcp_args(config),
"env": {},
}))?)
}
fn claude_project_json(name: &str, config: &McpConfig) -> Result<String> {
Ok(serde_json::to_string_pretty(&json!({
"mcpServers": {
name: {
"type": "stdio",
"command": config.sifs_path.to_string_lossy(),
"args": mcp_args(config),
"env": {},
}
}
}))?)
}
fn toml_string(value: &str) -> String {
serde_json::to_string(value).unwrap()
}
fn scope_arg(scope: McpScopeArg) -> &'static str {
match scope {
McpScopeArg::Local => "local",
McpScopeArg::Project => "project",
McpScopeArg::User => "user",
}
}
fn mcp_server_exists(command: &str, name: &str, _scope: Option<McpScopeArg>) -> Result<bool> {
let args = vec!["mcp".to_owned(), "get".to_owned(), name.to_owned()];
let output = ProcessCommand::new(command)
.args(&args)
.output()
.with_context(|| format!("run {command} {}", args.join(" ")))?;
Ok(output.status.success())
}
fn run_checked(command: &str, args: &[&str]) -> Result<()> {
let output = ProcessCommand::new(command)
.args(args)
.output()
.with_context(|| format!("run {command} {}", args.join(" ")))?;
if !output.status.success() {
bail!(
"{} failed: {}",
display_command(&prepend_command(
command,
&args.iter().map(|arg| arg.to_string()).collect::<Vec<_>>()
)),
String::from_utf8_lossy(&output.stderr).trim()
);
}
Ok(())
}
fn run_checked_owned(command: &str, args: &[String]) -> Result<()> {
let output = ProcessCommand::new(command)
.args(args)
.output()
.with_context(|| format!("run {command} {}", args.join(" ")))?;
if !output.status.success() {
bail!(
"{} failed: {}",
display_command(&prepend_command(command, args)),
String::from_utf8_lossy(&output.stderr).trim()
);
}
Ok(())
}
fn prepend_command(command: &str, args: &[String]) -> Vec<String> {
let mut full = vec![command.to_owned()];
full.extend(args.iter().cloned());
full
}
fn display_command(parts: &[String]) -> String {
parts
.iter()
.map(|part| shell_quote(part))
.collect::<Vec<_>>()
.join(" ")
}
fn shell_quote(value: &str) -> String {
if value
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '/' | '.' | '_' | '-' | ':' | '='))
{
value.to_owned()
} else {
format!("'{}'", value.replace('\'', "'\\''"))
}
}
fn command_status(command: &str) -> String {
command_path(command)
.map(|path| format!("found at {}", path.display()))
.unwrap_or_else(|| "not found on PATH".to_owned())
}
fn command_path(command: &str) -> Option<PathBuf> {
let path_var = std::env::var_os("PATH")?;
std::env::split_paths(&path_var)
.map(|dir| dir.join(command))
.find(|candidate| candidate.is_file())
}
fn stable_binary_path(path: &Path) -> &'static str {
if path.components().any(|component| {
matches!(
component.as_os_str().to_str(),
Some("target" | "debug" | "release")
)
}) {
"no"
} else {
"yes"
}
}
fn warn_if_development_binary(path: &Path) {
if stable_binary_path(path) == "no" {
eprintln!(
"Warning: sifs is running from {}. Install with `cargo install --locked sifs` or Homebrew before creating durable MCP config.",
path.display()
);
}
}
fn run_cache(command: CacheCommand) -> Result<()> {
match command {
CacheCommand::Status { cache_dir, json } => {
let root = cache_dir.unwrap_or(platform_cache_root()?);
let summary = cache_summary(&root);
if json {
println!(
"{}",
serde_json::to_string_pretty(&serde_json::json!({
"root": summary.root,
"exists": summary.exists,
"entries": summary.entries,
"files": summary.files,
"bytes": summary.bytes,
}))?
);
} else {
println!("Cache root: {}", summary.root.display());
println!("Exists: {}", summary.exists);
println!("Entries: {}", summary.entries);
println!("Files: {}", summary.files);
println!("Bytes: {}", summary.bytes);
}
}
CacheCommand::Clean { cache_dir, dry_run } => {
let root = cache_dir.unwrap_or(platform_cache_root()?);
let summary = cache_summary(&root);
if dry_run {
println!(
"Would remove {} files ({} bytes) from {}.",
summary.files,
summary.bytes,
summary.root.display()
);
} else {
remove_cache_root(&root)?;
println!("Removed cache: {}", root.display());
}
}
}
Ok(())
}
fn remove_cache_root(root: &Path) -> Result<()> {
if root.exists() {
fs::remove_dir_all(root)?;
}
Ok(())
}
fn run_init(force: bool) -> Result<()> {
let dest = PathBuf::from(".claude")
.join("agents")
.join("sifs-search.md");
if dest.exists() && !force {
eprintln!(
"{} already exists. Run with --force to overwrite.",
dest.display()
);
std::process::exit(1);
}
fs::create_dir_all(dest.parent().unwrap())?;
fs::write(&dest, include_str!("agents/sifs-search.md"))?;
println!("Created {}", dest.display());
Ok(())
}
fn print_capabilities() {
println!(
"{}",
[
"SIFS capabilities:",
"- Search local directories and Git URLs with hybrid, semantic, or BM25 ranking.",
"- Print structured CLI output with `--json`, `--jsonl`, or `--format compact`.",
"- Inspect indexes with `sifs files`, `sifs status`, `sifs get`, and `sifs clean`.",
"- Find related code from a known file and one-based line.",
"- Run `sifs mcp` as an MCP server with search, find_related, index_status, refresh_index, clear_index, list_indexed_files, get_chunk, and init_agent tools.",
"- Run BM25 search without loading or downloading an embedding model.",
"- Manage embedding models with `sifs model pull` and `sifs model status`.",
"- Generate a Claude agent file with `sifs init`.",
"- Use `sifs-benchmark` for quality and latency benchmarks.",
"- Use `sifs-embed` for embedding diagnostics.",
"",
"Discovery:",
"- `sifs --help` and subcommand help show CLI usage.",
"- MCP clients can call `tools/list` and read `sifs://server/context`.",
]
.join("\n")
);
}