use clap::Parser;
use engram::memory::Memory;
use engram_server::config::{BackendConfig, EmbeddingBackend, LlmBackend};
use engram_server::handlers;
use engram_server::mcp::{McpServer, McpToolDef};
use engram_server::rest::{self, AppState};
use serde_json::json;
use std::sync::Arc;
#[derive(Copy, Clone, Debug, clap::ValueEnum)]
enum LlmProvider {
Ollama,
#[value(name = "openai-compatible", alias = "openai")]
OpenAiCompatible,
Anthropic,
Google,
Command,
Mock,
}
#[derive(Copy, Clone, Debug, clap::ValueEnum)]
enum EmbeddingProvider {
Ollama,
Mock,
}
#[derive(Parser)]
#[command(name = "engram", about = "Engram — memory layer for AI agents")]
enum Cli {
Serve {
#[arg(long, env = "ENGRAM_DB_PATH", default_value = "engram.db")]
db: String,
#[arg(long, env = "ENGRAM_MODE", default_value = "mcp")]
mode: String,
#[arg(long, env = "ENGRAM_PORT", default_value = "9090")]
port: u16,
#[arg(
long,
value_enum,
env = "ENGRAM_LLM_PROVIDER",
default_value = "ollama"
)]
llm_provider: LlmProvider,
#[arg(
long,
env = "ENGRAM_OLLAMA_URL",
default_value = "http://localhost:11434"
)]
ollama_url: String,
#[arg(long, env = "ENGRAM_OLLAMA_LLM_MODEL", default_value = "llama3.2")]
ollama_llm_model: String,
#[arg(long, env = "OPENAI_API_KEY", hide_env_values = true)]
openai_api_key: Option<String>,
#[arg(
long,
env = "ENGRAM_OPENAI_BASE_URL",
default_value = "https://api.openai.com/v1"
)]
openai_base_url: String,
#[arg(long, env = "ENGRAM_OPENAI_MODEL", default_value = "gpt-4o-mini")]
openai_model: String,
#[arg(long, env = "ANTHROPIC_API_KEY", hide_env_values = true)]
anthropic_api_key: Option<String>,
#[arg(
long,
env = "ENGRAM_ANTHROPIC_BASE_URL",
default_value = "https://api.anthropic.com"
)]
anthropic_base_url: String,
#[arg(
long,
env = "ENGRAM_ANTHROPIC_MODEL",
default_value = "claude-haiku-4-5-20251001"
)]
anthropic_model: String,
#[arg(long, env = "GOOGLE_API_KEY", hide_env_values = true)]
google_api_key: Option<String>,
#[arg(
long,
env = "ENGRAM_GOOGLE_BASE_URL",
default_value = "https://generativelanguage.googleapis.com/v1beta"
)]
google_base_url: String,
#[arg(
long,
env = "ENGRAM_GOOGLE_MODEL",
default_value = "gemini-flash-latest"
)]
google_model: String,
#[arg(long, env = "ENGRAM_LLM_COMMAND")]
llm_command: Option<String>,
#[arg(long, env = "ENGRAM_LLM_COMMAND_TIMEOUT", default_value = "120")]
llm_command_timeout: u64,
#[arg(
long,
value_enum,
env = "ENGRAM_EMBEDDING_PROVIDER",
default_value = "ollama"
)]
embedding_provider: EmbeddingProvider,
#[arg(
long,
env = "ENGRAM_EMBEDDING_MODEL",
default_value = "nomic-embed-text"
)]
embedding_model: String,
#[arg(long, env = "ENGRAM_EMBEDDING_DIMS", default_value = "768")]
embedding_dims: usize,
#[arg(long, env = "ENGRAM_EXTRACT_ON_SAVE", default_value = "true")]
extract_on_save: bool,
},
}
fn tool_defs() -> Vec<McpToolDef> {
vec![
McpToolDef {
name: "memory_add".into(),
description: "Extract and store facts from conversation messages".into(),
input_schema: json!({
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": { "type": "string" },
"content": { "type": "string" }
},
"required": ["role", "content"]
},
"description": "Conversation messages to extract facts from"
},
"user_id": { "type": "string", "description": "User identifier" },
"org_id": { "type": "string", "description": "Organization identifier" },
"session_id": { "type": "string", "description": "Session identifier" }
},
"required": ["messages"]
}),
},
McpToolDef {
name: "memory_recall".into(),
description: "Semantic search over stored facts".into(),
input_schema: json!({
"type": "object",
"properties": {
"query": { "type": "string", "description": "Search query" },
"user_id": { "type": "string" },
"org_id": { "type": "string" },
"max_results": { "type": "integer", "default": 10 }
},
"required": ["query"]
}),
},
McpToolDef {
name: "memory_context".into(),
description: "Assemble a token-budgeted context block for LLM prompts".into(),
input_schema: json!({
"type": "object",
"properties": {
"query": { "type": "string", "description": "Query to build context for" },
"user_id": { "type": "string" },
"org_id": { "type": "string" },
"token_budget": { "type": "integer", "default": 2000 },
"format": { "type": "string", "enum": ["system_prompt", "markdown", "raw"], "default": "system_prompt" }
},
"required": ["query"]
}),
},
McpToolDef {
name: "memory_forget".into(),
description: "Soft-delete a fact by ID".into(),
input_schema: json!({
"type": "object",
"properties": {
"fact_id": { "type": "string", "description": "UUID of the fact to forget" },
"reason": { "type": "string", "description": "Reason for forgetting" }
},
"required": ["fact_id"]
}),
},
McpToolDef {
name: "memory_search".into(),
description: "Keyword search over stored facts (FTS5)".into(),
input_schema: json!({
"type": "object",
"properties": {
"query": { "type": "string", "description": "Keyword search query" },
"user_id": { "type": "string" },
"org_id": { "type": "string" },
"top_k": { "type": "integer", "default": 10 }
},
"required": ["query"]
}),
},
McpToolDef {
name: "memory_stats".into(),
description: "Return aggregate memory statistics".into(),
input_schema: json!({
"type": "object",
"properties": {}
}),
},
McpToolDef {
name: "memory_consolidate".into(),
description: "Run a memory consolidation cycle (decay, promote, dedup)".into(),
input_schema: json!({
"type": "object",
"properties": {
"user_id": { "type": "string" },
"org_id": { "type": "string" }
}
}),
},
McpToolDef {
name: "messages_save".into(),
description: "Save chat messages to a conversation, optionally extracting facts".into(),
input_schema: json!({
"type": "object",
"properties": {
"conversation_id": { "type": "string", "description": "Conversation identifier" },
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": { "type": "string" },
"content": { "type": "string" },
"metadata": { "type": "object" }
},
"required": ["role", "content"]
},
"description": "Chat messages to save"
},
"user_id": { "type": "string", "description": "User identifier" },
"org_id": { "type": "string", "description": "Organization identifier" }
},
"required": ["conversation_id", "messages"]
}),
},
McpToolDef {
name: "messages_get".into(),
description: "Retrieve chat messages from a conversation".into(),
input_schema: json!({
"type": "object",
"properties": {
"conversation_id": { "type": "string", "description": "Conversation identifier" },
"last_n": { "type": "integer", "description": "Only return the last N messages" },
"user_id": { "type": "string" },
"org_id": { "type": "string" }
},
"required": ["conversation_id"]
}),
},
McpToolDef {
name: "messages_list".into(),
description: "List all conversation IDs visible to the given scope".into(),
input_schema: json!({
"type": "object",
"properties": {
"user_id": { "type": "string" },
"org_id": { "type": "string" }
}
}),
},
McpToolDef {
name: "messages_delete".into(),
description: "Delete all messages in a conversation".into(),
input_schema: json!({
"type": "object",
"properties": {
"conversation_id": { "type": "string", "description": "Conversation identifier" },
"user_id": { "type": "string" },
"org_id": { "type": "string" }
},
"required": ["conversation_id"]
}),
},
]
}
#[allow(clippy::too_many_arguments)]
fn build_backend_config(
llm_provider: LlmProvider,
ollama_url: String,
ollama_llm_model: String,
openai_api_key: Option<String>,
openai_base_url: String,
openai_model: String,
anthropic_api_key: Option<String>,
anthropic_base_url: String,
anthropic_model: String,
google_api_key: Option<String>,
google_base_url: String,
google_model: String,
llm_command: Option<String>,
llm_command_timeout: u64,
embedding_provider: EmbeddingProvider,
embedding_model: String,
embedding_dims: usize,
) -> Result<BackendConfig, String> {
let llm = match llm_provider {
LlmProvider::Mock => LlmBackend::Mock,
LlmProvider::Ollama => LlmBackend::Ollama {
base_url: ollama_url.clone(),
model: ollama_llm_model,
},
LlmProvider::OpenAiCompatible => LlmBackend::OpenAiCompatible {
base_url: openai_base_url,
api_key: openai_api_key
.ok_or("openai-compatible LLM provider selected but OPENAI_API_KEY is not set")?,
model: openai_model,
},
LlmProvider::Anthropic => LlmBackend::Anthropic {
base_url: anthropic_base_url,
api_key: anthropic_api_key
.ok_or("Anthropic LLM provider selected but ANTHROPIC_API_KEY is not set")?,
model: anthropic_model,
},
LlmProvider::Google => LlmBackend::Google {
base_url: google_base_url,
api_key: google_api_key
.ok_or("Google LLM provider selected but GOOGLE_API_KEY is not set")?,
model: google_model,
},
LlmProvider::Command => LlmBackend::Command {
command: llm_command.ok_or(
"command LLM provider selected but --llm-command / ENGRAM_LLM_COMMAND is not set",
)?,
timeout_secs: llm_command_timeout,
},
};
let embedding = match embedding_provider {
EmbeddingProvider::Mock => EmbeddingBackend::Mock {
dims: embedding_dims,
},
EmbeddingProvider::Ollama => EmbeddingBackend::Ollama {
base_url: ollama_url,
model: embedding_model,
dims: embedding_dims,
},
};
Ok(BackendConfig { llm, embedding })
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_env_filter("engram=info")
.init();
let cli = Cli::parse();
match cli {
Cli::Serve {
db,
mode,
port,
llm_provider,
ollama_url,
ollama_llm_model,
openai_api_key,
openai_base_url,
openai_model,
anthropic_api_key,
anthropic_base_url,
anthropic_model,
google_api_key,
google_base_url,
google_model,
llm_command,
llm_command_timeout,
embedding_provider,
embedding_model,
embedding_dims,
extract_on_save,
} => {
if let Some(parent) = std::path::Path::new(&db).parent() {
if !parent.as_os_str().is_empty() {
let _ = std::fs::create_dir_all(parent);
}
}
let config = match build_backend_config(
llm_provider,
ollama_url,
ollama_llm_model,
openai_api_key,
openai_base_url,
openai_model,
anthropic_api_key,
anthropic_base_url,
anthropic_model,
google_api_key,
google_base_url,
google_model,
llm_command,
llm_command_timeout,
embedding_provider,
embedding_model,
embedding_dims,
) {
Ok(c) => c,
Err(e) => {
eprintln!("engram: configuration error — {e}");
std::process::exit(2);
}
};
eprintln!("engram: db = {db}");
eprintln!("engram: llm = {}", config.llm.describe());
eprintln!("engram: embedding = {}", config.embedding.describe());
if config.is_mock() {
eprintln!(
"engram: WARNING — mock backend in use; memory_add will return zero facts."
);
eprintln!(
"engram: WARNING — set ENGRAM_LLM_PROVIDER=ollama|openai|anthropic|google for real extraction."
);
}
eprintln!("engram: extract_on_save = {extract_on_save}");
let memory = if db.starts_with("postgres://") || db.starts_with("postgresql://") {
tracing::info!(" backend: PostgreSQL");
Memory::open_postgres(&db, config.embedding.build())
.await
.expect("failed to open Postgres memory database")
} else {
let db_url = format!("sqlite:{db}?mode=rwc");
tracing::info!(" backend: SQLite ({db})");
Memory::open(&db_url, config.embedding.build())
.await
.expect("failed to open SQLite memory database")
};
let memory = Arc::new(memory);
match mode.as_str() {
"rest" => {
let state = AppState {
memory: memory.clone(),
llm_backend: config.llm.clone(),
extract_on_save,
};
let app = rest::build_router(state);
let addr = format!("0.0.0.0:{port}");
eprintln!("engram: REST server listening on {addr}");
let listener = tokio::net::TcpListener::bind(&addr)
.await
.expect("failed to bind");
axum::serve(listener, app).await.expect("server error");
}
_ => {
eprintln!("engram: MCP server ready");
let defs = tool_defs();
let m = memory.clone();
let llm_backend = config.llm.clone();
let server = McpServer::new()
.tool(defs[0].clone(), {
let m = m.clone();
let lb = llm_backend.clone();
move |args| {
let m = m.clone();
let lb = lb.clone();
async move { handlers::handle_add(m, lb, args).await }
}
})
.tool(defs[1].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_recall(m, args).await }
}
})
.tool(defs[2].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_context(m, args).await }
}
})
.tool(defs[3].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_forget(m, args).await }
}
})
.tool(defs[4].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_search(m, args).await }
}
})
.tool(defs[5].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_stats(m, args).await }
}
})
.tool(defs[6].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_consolidate(m, args).await }
}
})
.tool(defs[7].clone(), {
let m = m.clone();
let lb = llm_backend.clone();
let eos = extract_on_save;
move |args| {
let m = m.clone();
let lb = lb.clone();
async move {
handlers::handle_messages_save(m, lb, eos, args).await
}
}
})
.tool(defs[8].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_messages_get(m, args).await }
}
})
.tool(defs[9].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_messages_list(m, args).await }
}
})
.tool(defs[10].clone(), {
let m = m.clone();
move |args| {
let m = m.clone();
async move { handlers::handle_messages_delete(m, args).await }
}
});
if let Err(e) = server.run().await {
eprintln!("engram: server error: {e}");
std::process::exit(1);
}
}
}
}
}
}