use std::path::PathBuf;
use anyhow::{Context, bail};
use clap::Parser;
use tracing_subscriber::EnvFilter;
use zeph_llm::any::AnyProvider;
use zeph_llm::claude::ClaudeProvider;
use zeph_llm::ollama::OllamaProvider;
use zeph_llm::openai::OpenAiProvider;
use zeph_skills::loader::{SkillMeta, load_skill_meta};
use zeph_skills::miner::{MiningConfig, SkillMiner};
#[derive(Parser, Debug)]
#[command(
name = "zeph-skills-miner",
about = "Mine skills from GitHub repositories"
)]
struct Cli {
#[arg(long, value_name = "PATH")]
config: Option<PathBuf>,
#[arg(long = "query", value_name = "QUERY")]
queries: Vec<String>,
#[arg(long, value_name = "DIR")]
output: Option<PathBuf>,
#[arg(long)]
dry_run: bool,
#[arg(long)]
verbose: bool,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
let log_level = if cli.verbose { "debug" } else { "info" };
tracing_subscriber::fmt()
.with_env_filter(
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(log_level)),
)
.init();
let config_path = cli.config.unwrap_or_else(default_config_path);
tracing::info!(path = %config_path.display(), "loading config");
let config_str = std::fs::read_to_string(&config_path)
.with_context(|| format!("failed to read config from {}", config_path.display()))?;
let config: zeph_config::Config = toml::from_str(&config_str)
.with_context(|| format!("failed to parse config from {}", config_path.display()))?;
let github_token = resolve_github_token()?;
let generation_provider_name = config.skills.mining.generation_provider.as_str().to_owned();
let embed_provider_name = config.skills.mining.embedding_provider.as_str().to_owned();
let generation_provider = build_provider(&config, &generation_provider_name)
.context("failed to build generation provider")?;
let embed_provider = if embed_provider_name.is_empty() {
generation_provider.clone()
} else {
build_provider(&config, &embed_provider_name)
.context("failed to build embedding provider")?
};
let output_dir = if let Some(dir) = cli.output {
dir
} else if let Some(ref dir) = config.skills.mining.output_dir {
expand_tilde(dir)
} else if let Some(first_path) = config.skills.paths.first() {
expand_tilde(first_path)
} else {
bail!("no output directory configured; set skills.mining.output_dir or skills.paths");
};
std::fs::create_dir_all(&output_dir)
.with_context(|| format!("failed to create output dir {}", output_dir.display()))?;
let queries: Vec<String> = if !cli.queries.is_empty() {
cli.queries
} else {
config.skills.mining.queries.clone()
};
if queries.is_empty() {
bail!("no search queries configured; set skills.mining.queries or pass --query");
}
let existing_skills = load_existing_skills(&config);
tracing::info!(
existing = existing_skills.len(),
"loaded existing skills for dedup"
);
let mining_config = MiningConfig {
queries,
max_repos_per_query: config.skills.mining.max_repos_per_query.min(100),
dedup_threshold: config.skills.mining.dedup_threshold,
output_dir: output_dir.clone(),
rate_limit_rpm: config.skills.mining.rate_limit_rpm,
dry_run: cli.dry_run,
};
let miner = SkillMiner::new(
generation_provider,
embed_provider,
github_token,
mining_config,
)?;
if cli.dry_run {
tracing::info!("dry-run mode: no files will be written");
}
tracing::info!(output = %output_dir.display(), "starting skill mining");
let results = miner.run(&existing_skills).await?;
tracing::info!(mined = results.len(), "mining complete");
for mined in &results {
println!(
"[{}] {} (nearest_sim={:.2}{})",
mined.repo,
mined.skill.name,
mined.nearest_similarity,
if cli.dry_run { ", dry-run" } else { "" }
);
for w in &mined.skill.warnings {
println!(" WARNING: {w}");
}
}
Ok(())
}
fn default_config_path() -> PathBuf {
dirs::config_dir()
.unwrap_or_else(|| PathBuf::from("~/.config"))
.join("zeph")
.join("config.toml")
}
fn resolve_github_token() -> anyhow::Result<String> {
match std::env::var("ZEPH_GITHUB_TOKEN") {
Ok(token) if !token.is_empty() => Ok(token),
_ => bail!(
"GitHub token not found. Export it before running:\n \
export ZEPH_GITHUB_TOKEN=$(zeph vault get ZEPH_GITHUB_TOKEN)"
),
}
}
fn build_provider(config: &zeph_config::Config, name: &str) -> anyhow::Result<AnyProvider> {
let entry = if name.is_empty() {
config
.llm
.providers
.first()
.context("no providers configured in [[llm.providers]]")?
} else {
config
.llm
.providers
.iter()
.find(|e| e.effective_name() == name || e.provider_type.as_str() == name)
.with_context(|| format!("provider '{name}' not found in [[llm.providers]]"))?
};
let model = entry.effective_model();
let max_tokens = entry.max_tokens.unwrap_or(4096);
let api_key = entry.api_key.clone().unwrap_or_default();
let provider = match entry.provider_type {
zeph_config::ProviderKind::Claude => {
AnyProvider::Claude(ClaudeProvider::new(api_key, model, max_tokens))
}
zeph_config::ProviderKind::OpenAi => {
let base_url = entry
.base_url
.clone()
.unwrap_or_else(|| "https://api.openai.com/v1".into());
let embedding_model = entry.embedding_model.clone();
AnyProvider::OpenAi(OpenAiProvider::new(
api_key,
base_url,
model,
max_tokens,
embedding_model,
None, ))
}
zeph_config::ProviderKind::Ollama => {
let base_url = entry
.base_url
.as_deref()
.unwrap_or("http://localhost:11434");
let embed_model = entry
.embedding_model
.clone()
.unwrap_or_else(|| "nomic-embed-text".into());
AnyProvider::Ollama(OllamaProvider::new(base_url, model, embed_model))
}
other => {
bail!("unsupported provider type '{other}' in miner; use claude, openai, or ollama")
}
};
Ok(provider)
}
fn expand_tilde(path: &str) -> PathBuf {
if let Some(stripped) = path.strip_prefix("~/") {
dirs::home_dir()
.map(|h| h.join(stripped))
.unwrap_or_else(|| PathBuf::from(path))
} else {
PathBuf::from(path)
}
}
fn load_existing_skills(config: &zeph_config::Config) -> Vec<SkillMeta> {
let mut skills = Vec::new();
for path_str in &config.skills.paths {
let base = expand_tilde(path_str);
if !base.exists() {
continue;
}
let Ok(entries) = std::fs::read_dir(&base) else {
continue;
};
for entry in entries.flatten() {
let skill_dir = entry.path();
if !skill_dir.is_dir() {
continue;
}
let skill_path = skill_dir.join("SKILL.md");
if !skill_path.exists() {
continue;
}
match load_skill_meta(&skill_path) {
Ok(meta) => skills.push(meta),
Err(e) => tracing::warn!(
path = %skill_path.display(),
error = %e,
"skipping invalid skill"
),
}
}
}
skills
}