//! `rk-mem` — ReasonKit Mem utilities (clap version)
//!
//! Docset ingestion (Cursor-like `@Docs`) without requiring `reasonkit-org`.
//!
//! This version uses clap for argument parsing and follows ReasonKit CLI standards.
#[derive(Parser)]
#[command(name = "rk-mem", about = "ReasonKit Memory utilities", version)]
struct Cli {
#[command(flatten)]
base: CliBase,
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Docset ingestion and refresh (Cursor-like @Docs)
Docs {
#[command(subcommand)]
action: DocsAction,
},
/// Show help information
Help {
/// Show help for a specific command
command: Option<String>,
},
}
#[derive(Subcommand)]
enum DocsAction {
/// Add a new docset
Add {
/// Name of the docset
name: String,
/// Starting URL for the docset
start_url: String,
/// Allowed URL prefixes (defaults to start_url if not specified)
allowed_prefixes: Option<Vec<String>>,
},
/// List all docsets
List,
/// Query documents in docsets
Query {
/// Search query
query: String,
/// Filter by docset name or ID
#[arg(long)]
docset: Option<String>,
/// Number of results to return
#[arg(short = 'k', long, default_value = "8")]
top_k: usize,
/// Output in JSON format
#[arg(long)]
json: bool,
},
/// Remove a docset
Remove {
/// Docset ID to remove
docset_id: String,
/// Keep the index (don't delete indexed documents)
#[arg(long)]
keep_index: bool,
},
/// Refresh docsets
Refresh {
/// Only refresh docsets whose refresh policy is due
#[arg(long)]
due: bool,
/// Maximum pages to fetch per docset
#[arg(long)]
max_pages: Option<usize>,
/// Concurrency level for fetching
#[arg(long)]
concurrency: Option<usize>,
/// Request timeout in seconds
#[arg(long)]
timeout_secs: Option<u64>,
},
}
#[derive(Debug, Serialize)]
struct DocsQueryHit {
score: f32,
sparse_score: Option<f32>,
doc_id: String,
chunk_id: String,
url: Option<String>,
title: Option<String>,
docset: Option<String>,
docset_id: Option<String>,
text: String,
}
fn data_dir() -> PathBuf {
if let Ok(dir) = std::env::var("RKMEM_DATA_DIR") {
return PathBuf::from(dir);
}
dirs::data_local_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join("reasonkit")
.join("mem")
}
async fn cmd_docs_add(store: &DocsetStore, name: String, start_url: String, allowed_prefixes: Option<Vec<String>>) -> anyhow::Result<()> {
let allowed_prefixes = allowed_prefixes.unwrap_or_else(|| vec![start_url.clone()]);
let docset = Docset::new(name, start_url, allowed_prefixes);
let saved = store.upsert(docset).await?;
format_success(&format!("Added docset: {} ({})", saved.name, saved.id), None, OutputFormat::Text)?;
Ok(())
}
async fn cmd_docs_list(store: &DocsetStore) -> anyhow::Result<()> {
let docsets = store.load().await?;
if docsets.is_empty() {
println!("No docsets configured.");
return Ok(());
}
let now = Utc::now();
let mut output = String::new();
output.push_str(§ion_header("Docset List", true));
for ds in docsets {
let due = ds.is_due(now);
output.push_str(&format!(
"\n{}\n name: {}\n start_url: {}\n refresh: {:?}\n status: {:?}\n due: {}\n",
ds.id, ds.name, ds.start_url, ds.refresh, ds.status, due
));
}
println!("{}", output);
Ok(())
}
async fn cmd_docs_remove(store: &DocsetStore, data_dir: &Path, docset_id: String, keep_index: bool) -> anyhow::Result<()> {
let id = Uuid::parse_str(&docset_id).context("Invalid DOCSET_ID")?;
let deleted = store.delete(id).await?;
if !deleted {
return Err(anyhow!("Docset not found: {}", id));
}
format_success(&format!("Removed docset config: {}", id), None, OutputFormat::Text)?;
if keep_index {
return Ok(());
}
// Best-effort: delete any indexed documents tagged with this docset id.
let retriever = open_default_docset_retriever(data_dir.to_path_buf()).await?;
let ctx = AccessContext::new(
"rk-mem".to_string(),
AccessLevel::Admin,
"docs_remove".to_string(),
);
let doc_ids = retriever
.storage()
.list_documents(&ctx)
.await
.context("Failed to list documents")?;
let wanted_tag = format!("docset_id:{id}");
let mut removed_docs = 0usize;
for doc_id in doc_ids {
let doc = retriever
.storage()
.get_document(&doc_id, &ctx)
.await
.context("Failed to read document")?;
let Some(doc) = doc else {
continue;
};
if doc.metadata.tags.iter().any(|t| t == &wanted_tag) {
let _ = retriever.delete_document(&doc_id).await;
removed_docs += 1;
}
}
println!("Removed {} document(s) from index.", removed_docs);
Ok(())
}
async fn cmd_docs_query(
store: &DocsetStore,
data_dir: &Path,
query: String,
docset_filter: Option<String>,
top_k: usize,
json: bool,
) -> anyhow::Result<()> {
let wanted_docset_id = if let Some(filter) = docset_filter {
if let Ok(id) = Uuid::parse_str(&filter) {
Some(id)
} else {
let docsets = store.load().await?;
let Some(found) = docsets.iter().find(|d| d.name.eq_ignore_ascii_case(&filter)) else {
return Err(anyhow!(
"Unknown docset: {} (expected name or UUID; see `rk-mem docs list`)",
filter
));
};
Some(found.id)
}
} else {
None
};
let retriever = open_default_docset_retriever(data_dir.to_path_buf()).await?;
let ctx = AccessContext::new(
"rk-mem".to_string(),
AccessLevel::Admin,
"docs_query".to_string(),
);
// Over-fetch, then filter to docset-tagged docs.
let candidate_k = if top_k >= 200 {
top_k
} else {
top_k.saturating_mul(5).min(200)
};
let results = retriever.search_sparse(&query, candidate_k).await?;
let mut doc_cache: HashMap<Uuid, Option<reasonkit_mem::Document>> = HashMap::new();
let mut hits: Vec<DocsQueryHit> = Vec::with_capacity(top_k);
for r in results {
if hits.len() >= top_k {
break;
}
let doc = if let Some(cached) = doc_cache.get(&r.doc_id) {
cached.clone()
} else {
let loaded = retriever
.storage()
.get_document(&r.doc_id, &ctx)
.await
.context("Failed to read document")?;
doc_cache.insert(r.doc_id, loaded.clone());
loaded
};
let Some(doc) = doc else {
continue;
};
// Restrict to docset docs by default.
let docset_id_tag = doc
.metadata
.tags
.iter()
.find(|t| t.starts_with("docset_id:"))
.cloned();
if docset_id_tag.is_none() {
continue;
}
if let Some(wanted) = wanted_docset_id {
let wanted_tag = format!("docset_id:{wanted}");
if !doc.metadata.tags.iter().any(|t| t == &wanted_tag) {
continue;
}
}
let docset_tag = doc
.metadata
.tags
.iter()
.find(|t| t.starts_with("docset:"))
.map(|t| t.trim_start_matches("docset:").to_string());
hits.push(DocsQueryHit {
score: r.score,
sparse_score: r.sparse_score,
doc_id: r.doc_id.to_string(),
chunk_id: r.chunk_id.to_string(),
url: doc.source.url.clone(),
title: doc.metadata.title.clone(),
docset: docset_tag,
docset_id: docset_id_tag.map(|t| t.trim_start_matches("docset_id:").to_string()),
text: r.text,
});
}
if json {
println!("{}", serde_json::to_string_pretty(&hits)?);
return Ok(());
}
if hits.is_empty() {
println!("No results.");
return Ok(());
}
println!("{}", section_header("Search Results", true));
for (i, h) in hits.iter().enumerate() {
let url = h.url.as_deref().unwrap_or("<no-url>");
let title = h.title.as_deref().unwrap_or("<no-title>");
println!(
"#{:02} score={:.4} docset={} url={}\n {}\n",
i + 1,
h.score,
h.docset.as_deref().unwrap_or("<unknown>"),
url,
title
);
}
Ok(())
}
async fn cmd_docs_refresh(
store: &DocsetStore,
data_dir: &Path,
due: bool,
max_pages: Option<usize>,
concurrency: Option<usize>,
timeout_secs: Option<u64>,
) -> anyhow::Result<()> {
let mut opts = DocsetIngestOptions {
manifest_dir: Some(data_dir.join("docsets")),
refresh_due_only: due,
..Default::default()
};
if let Some(max) = max_pages {
opts.max_pages = max;
}
if let Some(conc) = concurrency {
opts.concurrency = conc;
}
if let Some(secs) = timeout_secs {
opts.request_timeout = std::time::Duration::from_secs(secs);
}
let mut docsets = store.load().await?;
if docsets.is_empty() {
println!("No docsets configured.");
return Ok(());
}
let retriever = open_default_docset_retriever(data_dir.to_path_buf()).await?;
let ingestor = DocsetIngestor::new(retriever)?;
println!("{}", section_header("Refreshing Docsets", true));
for ds in docsets.iter_mut() {
let res = ingestor.ingest_docset(ds, &opts).await;
match res {
Ok(report) => {
println!(
"Docset {} ({}) — method={}, discovered={}, fetched={}, indexed={}, skipped={}, removed={}, failures={}",
report.docset_name,
report.docset_id,
report.discovery_method,
report.discovered_urls,
report.fetched_pages,
report.indexed_pages,
report.skipped_unchanged,
report.removed_pages,
report.failures
);
}
Err(e) => {
ds.status = RefreshStatus::Error {
at: Utc::now(),
message: e.to_string(),
};
eprintln!("Docset {} ({}) — ERROR: {}", ds.name, ds.id, e);
}
}
// Persist latest status / timestamps.
let _ = store.upsert(ds.clone()).await;
}
format_success("Refresh complete", None, OutputFormat::Text)?;
Ok(())
}