mod ai;
mod config;
mod interaction;
mod output;
use anyhow::{anyhow, Context, Result};
use clap::{Args, Parser, Subcommand, ValueEnum};
use config::{AppConfig, SourceConfig};
use dictx_core::{Query, SearchFilters, SearchRequest};
use dictx_index::{build_index, read_metadata, BuildOptions};
use dictx_parser::{parser_for, write_dxdict, DxdictMetadata};
use dictx_search::{parse_lookup_query, parse_search_query, DictSearcher, SearchResult};
use indicatif::{ProgressBar, ProgressStyle};
use output::{print_entries, print_json_result, print_source_list};
use std::fs;
use std::io::IsTerminal;
use std::path::PathBuf;
const DEFAULT_DISPLAY_LIMIT: usize = 3;
const SEARCH_PREFETCH_LIMIT: usize = 48;
#[derive(Debug, Parser)]
#[command(
name = "dictx",
version,
about = "Rust 终端离线词库 + 可选在线 AI API 词典"
)]
struct Cli {
#[arg(long, global = true, env = "DICTX_CONFIG")]
config: Option<PathBuf>,
#[arg(long, global = true)]
no_color: bool,
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Debug, Subcommand)]
enum Command {
Init(InitArgs),
#[command(alias = "l")]
Lookup(LookupArgs),
Zh(ZhArgs),
Search(SearchArgs),
Build(BuildArgs),
Source(SourceCommand),
Config(ConfigCommand),
Ai(AiArgs),
}
#[derive(Debug, Args)]
struct InitArgs {
#[arg(short, long)]
force: bool,
}
#[derive(Debug, Args)]
struct LookupArgs {
word: String,
#[arg(short, long)]
source: Option<String>,
#[arg(short, long)]
pos: Option<String>,
#[arg(short, long, value_enum, default_value_t = OutputFormat::Rich)]
format: OutputFormat,
#[arg(short = 'n', long, default_value_t = DEFAULT_DISPLAY_LIMIT)]
limit: usize,
#[arg(long)]
raw: bool,
}
#[derive(Debug, Args)]
struct ZhArgs {
text: String,
#[arg(short, long)]
source: Option<String>,
#[arg(short = 'n', long, default_value_t = DEFAULT_DISPLAY_LIMIT)]
limit: usize,
#[arg(short, long, value_enum, default_value_t = OutputFormat::Rich)]
format: OutputFormat,
}
#[derive(Debug, Args)]
struct SearchArgs {
query: String,
#[arg(short, long)]
source: Option<String>,
#[arg(short, long)]
pos: Option<String>,
#[arg(short, long)]
tag: Option<String>,
#[arg(long)]
collins_min: Option<u8>,
#[arg(long)]
freq_min: Option<u32>,
#[arg(long)]
freq_max: Option<u32>,
#[arg(long)]
oxford_only: bool,
#[arg(short = 'n', long, default_value_t = DEFAULT_DISPLAY_LIMIT)]
limit: usize,
#[arg(long, default_value_t = 0)]
offset: usize,
#[arg(short, long, value_enum, default_value_t = OutputFormat::Rich)]
format: OutputFormat,
}
#[derive(Debug, Args)]
struct BuildArgs {
#[arg(short, long)]
source: Option<String>,
#[arg(short, long)]
force: bool,
#[arg(long, default_value_t = 128)]
ram: usize,
}
#[derive(Debug, Args)]
struct SourceCommand {
#[command(subcommand)]
command: SourceSubcommand,
}
#[derive(Debug, Subcommand)]
enum SourceSubcommand {
List,
Recommend,
Add(SourceAddArgs),
Import(SourceImportArgs),
Pack(SourcePackArgs),
Remove { name: String },
Info { name: String },
}
#[derive(Debug, Args)]
struct SourceAddArgs {
name: String,
path: PathBuf,
#[arg(short = 'f', long)]
format: String,
#[arg(long)]
display: Option<String>,
#[arg(long, default_value_t = true)]
enabled: bool,
}
#[derive(Debug, Args)]
struct SourceImportArgs {
name: String,
path: PathBuf,
#[arg(short = 'f', long)]
format: String,
#[arg(long)]
display: Option<String>,
#[arg(long, default_value_t = true)]
enabled: bool,
#[arg(long)]
force: bool,
}
#[derive(Debug, Args)]
struct SourcePackArgs {
input: PathBuf,
output: PathBuf,
#[arg(short = 'f', long)]
format: String,
#[arg(long)]
name: Option<String>,
#[arg(long)]
display: Option<String>,
#[arg(long)]
force: bool,
#[arg(long)]
shard_size: Option<usize>,
}
#[derive(Debug, Args)]
struct ConfigCommand {
#[command(subcommand)]
command: ConfigSubcommand,
}
#[derive(Debug, Subcommand)]
enum ConfigSubcommand {
Show,
Path,
Paths,
Get { key: String },
Set { key: String, value: String },
}
#[derive(Debug, Args)]
struct AiArgs {
text: String,
#[arg(long)]
context: Option<String>,
#[arg(long)]
model: Option<String>,
}
#[derive(Debug, Clone, Copy, ValueEnum, PartialEq, Eq)]
enum OutputFormat {
Rich,
Plain,
Json,
}
fn main() -> Result<()> {
let cli = Cli::parse();
let command = cli.command.unwrap_or(Command::Config(ConfigCommand {
command: ConfigSubcommand::Show,
}));
match command {
Command::Init(args) => init(cli.config, args.force),
Command::Lookup(args) => {
let config = AppConfig::load_or_create(cli.config.as_deref())?;
let mut request = SearchRequest::new(parse_lookup_query(&args.word));
request.limit = args.limit;
request.filters.pos = args.pos;
run_search(
&config,
args.source.as_deref(),
request,
args.format,
args.raw,
cli.no_color,
)
}
Command::Zh(args) => {
let config = AppConfig::load_or_create(cli.config.as_deref())?;
let mut request = SearchRequest::new(Query::Chinese { text: args.text });
request.limit = args.limit;
run_search(
&config,
args.source.as_deref(),
request,
args.format,
false,
cli.no_color,
)
}
Command::Search(args) => {
let config = AppConfig::load_or_create(cli.config.as_deref())?;
let mut request = SearchRequest::new(parse_search_query(
&args.query,
config.search.fuzzy_distance,
));
request.limit = args.limit;
request.offset = args.offset;
request.filters = SearchFilters {
source: None,
pos: args.pos,
tag: args.tag,
collins_min: args.collins_min,
freq_min: args.freq_min,
freq_max: args.freq_max,
oxford_only: args.oxford_only,
};
run_search(
&config,
args.source.as_deref(),
request,
args.format,
false,
cli.no_color,
)
}
Command::Build(args) => {
let config = AppConfig::load_or_create(cli.config.as_deref())?;
build_sources(&config, args)
}
Command::Source(args) => source_command(cli.config, args.command),
Command::Config(args) => config_command(cli.config, args.command),
Command::Ai(args) => {
let config = AppConfig::load_or_create(cli.config.as_deref())?;
ai::run_ai(&config, args, !cli.no_color && config.output.color)
}
}
}
fn init(config_path: Option<PathBuf>, force: bool) -> Result<()> {
let path = AppConfig::resolve_path(config_path.as_deref())?;
if path.exists() && !force {
let mut existing = AppConfig::load_or_create(Some(&path))?;
if existing.sources.is_empty() {
let detected = AppConfig::default_with_detected_sources()?.sources;
if detected.is_empty() {
return Err(anyhow!(
"配置已存在但没有词典源: {}\n当前目录、父目录及 DictX 词库目录没有发现可用词库。\n请使用 dictx source import <NAME> <PATH> --format <FORMAT> 导入,或使用 dictx source add <NAME> <PATH> --format <FORMAT> 引用外部文件。",
path.display()
));
}
existing.sources = detected;
existing.save_to(&path)?;
println!("已更新配置: {}", path.display());
print_init_next_steps(&existing);
return Ok(());
}
let source_summary = existing
.sources
.iter()
.map(|source| format!(" - {} ({})", source.name, source.format))
.collect::<Vec<_>>()
.join("\n");
return Err(anyhow!(
"配置已存在: {}\n当前已配置 {} 个词典源:\n{}\n\n常用操作:\n - 查看详情: dictx source list\n - 构建索引: dictx build\n - 重新自动发现并覆盖: dictx init --force",
path.display(),
existing.sources.len(),
source_summary
));
}
let config = AppConfig::default_with_detected_sources()?;
if config.sources.is_empty() {
return Err(anyhow!(
"没有自动发现词典源。\n请把词库放入 DictX 词库目录后运行 dictx init --force,或使用 dictx source import/add 手动添加。"
));
}
config.save_to(&path)?;
println!("已写入配置: {}", path.display());
print_init_next_steps(&config);
Ok(())
}
fn run_search(
config: &AppConfig,
source_filter: Option<&str>,
mut request: SearchRequest,
format: OutputFormat,
raw: bool,
no_color: bool,
) -> Result<()> {
let sources = config.enabled_sources(source_filter);
if sources.is_empty() {
return Err(anyhow!(
"没有可用词典源。请先运行 dictx init 或 dictx source add。"
));
}
request.limit = request.limit.max(1);
let display_limit = request.limit;
let mut fetch_request = request.clone();
if matches!(format, OutputFormat::Rich | OutputFormat::Plain) && !raw {
fetch_request.limit = display_limit.max(SEARCH_PREFETCH_LIMIT);
}
let mut all = Vec::new();
let mut elapsed = 0u128;
let mut searched_sources = 0usize;
let mut missing_indexes = Vec::new();
for source in sources {
let index_dir = config.index_dir_for(&source.name);
if !index_dir.exists() {
missing_indexes.push((source.name.clone(), index_dir));
continue;
}
let searcher = DictSearcher::open(&index_dir)
.with_context(|| format!("打开索引失败: {}", index_dir.display()))?;
let result = searcher.search(&fetch_request)?;
searched_sources += 1;
elapsed += result.elapsed_ms;
all.extend(result.entries);
}
if searched_sources == 0 {
return Err(missing_index_error(source_filter, &missing_indexes));
}
all.sort_by(|left, right| {
right
.score
.partial_cmp(&left.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| left.entry.word_lower.cmp(&right.entry.word_lower))
});
let total = all.len();
let all = all
.into_iter()
.take(fetch_request.limit)
.collect::<Vec<_>>();
let result = SearchResult {
entries: all,
total,
elapsed_ms: elapsed,
};
match format {
OutputFormat::Json => print_json_result(&request, &result, raw),
OutputFormat::Plain => {
print_entries(&request, &result, false, raw, display_limit, false);
Ok(())
}
OutputFormat::Rich => {
let color = !no_color && config.output.color;
let interactive = !raw && std::io::stdout().is_terminal();
let rendered = print_entries(&request, &result, color, raw, display_limit, interactive);
if raw {
return Ok(());
}
if interactive {
interaction::run(config, &result, rendered, color)
} else {
Ok(())
}
}
}
}
fn missing_index_error(
source_filter: Option<&str>,
missing_indexes: &[(String, PathBuf)],
) -> anyhow::Error {
let mut message =
String::from("没有可查询索引。已启用词典源,但当前索引目录还没有构建完成。\n");
if !missing_indexes.is_empty() {
message.push_str("\n缺少索引:\n");
for (name, path) in missing_indexes {
message.push_str(&format!(" - {name}: {}\n", path.display()));
}
}
message.push_str("\n请先运行: ");
if let Some(source) = source_filter {
message.push_str(&format!("dictx build --source {source}"));
} else {
message.push_str("dictx build");
}
message.push_str(
"\n如果刚执行过 dictx init 或 dictx init --force,这一步是首次查询前必须执行的索引构建。",
);
anyhow!(message)
}
fn build_sources(config: &AppConfig, args: BuildArgs) -> Result<()> {
fs::create_dir_all(&config.index_dir)?;
let sources = config.enabled_sources(args.source.as_deref());
if sources.is_empty() {
return Err(anyhow!("没有匹配的启用词典源"));
}
for source in sources {
let parser = parser_for(&source.format)?;
let report = parser.validate(&source.path)?;
if !report.valid {
return Err(anyhow!(
"数据源 {} 校验失败: {}",
source.name,
report.issues.join("; ")
));
}
let index_dir = config.index_dir_for(&source.name);
let spinner = ProgressBar::new_spinner();
spinner.set_style(
ProgressStyle::with_template("{spinner:.cyan} {msg}")
.unwrap_or_else(|_| ProgressStyle::default_spinner()),
);
spinner.enable_steady_tick(std::time::Duration::from_millis(80));
spinner.set_message(format!(
"构建 {} ({})...",
source.display_name(),
source_location(&source)
));
let entries = parser.parse(&source.path)?;
let stats = build_index(
&index_dir,
entries,
&BuildOptions {
ram_mb: args.ram,
force: args.force,
source_name: source.name.clone(),
source_path: Some(source.path.clone()),
},
)?;
spinner.finish_and_clear();
println!(
"✓ {}: {} 条,{},{} ms",
source.display_name(),
stats.entries,
human_bytes(stats.index_bytes),
stats.elapsed_ms
);
}
Ok(())
}
fn source_command(config_path: Option<PathBuf>, command: SourceSubcommand) -> Result<()> {
let path = AppConfig::resolve_path(config_path.as_deref())?;
let mut config = AppConfig::load_or_create(Some(&path))?;
match command {
SourceSubcommand::List => {
print_source_list(&config)?;
}
SourceSubcommand::Recommend => print_recommended_sources(),
SourceSubcommand::Add(args) => {
if config.sources.iter().any(|source| source.name == args.name) {
return Err(anyhow!("词典源已存在: {}", args.name));
}
config.sources.push(SourceConfig {
name: args.name,
path: args.path,
format: args.format,
display: args.display,
enabled: args.enabled,
});
config.save_to(&path)?;
println!("已添加词典源");
}
SourceSubcommand::Import(args) => {
let source_path = args.path.canonicalize().with_context(|| {
format!("无法读取词典文件,请检查路径: {}", args.path.display())
})?;
let parser = parser_for(&args.format)?;
let report = parser.validate(&source_path)?;
if !report.valid {
return Err(anyhow!("词典文件校验失败: {}", report.issues.join("; ")));
}
let filename = source_path
.file_name()
.ok_or_else(|| anyhow!("词典路径缺少文件名: {}", source_path.display()))?;
let target_dir = config.dict_dir.join(&args.name);
let target_path = target_dir.join(format!("{}.dxdict", args.name));
if target_path.exists() && !args.force {
return Err(anyhow!(
"目标词库已存在: {}\n如需覆盖,请追加 --force。",
target_path.display()
));
}
if config.sources.iter().any(|source| source.name == args.name) && !args.force {
return Err(anyhow!(
"词典源已存在: {}\n如需替换,请追加 --force。",
args.name
));
}
fs::create_dir_all(&target_dir)?;
let count = write_dxdict(
&target_path,
&DxdictMetadata {
name: args.name.clone(),
display: args.display.clone(),
source_format: Some(args.format.clone()),
},
parser.parse(&source_path)?,
)?;
let imported = SourceConfig {
name: args.name,
path: target_path.clone(),
format: "dxdict".to_string(),
display: args.display,
enabled: args.enabled,
};
config.sources.retain(|source| source.name != imported.name);
config.sources.push(imported);
config.save_to(&path)?;
println!(
"已导入词典源: {} ({} 条,来源 {})",
target_path.display(),
count,
filename.to_string_lossy()
);
println!(
"下一步: dictx build --source {}",
config.sources.last().unwrap().name
);
}
SourceSubcommand::Pack(args) => {
let source_path = args.input.canonicalize().with_context(|| {
format!("无法读取词典文件,请检查路径: {}", args.input.display())
})?;
if args.output.exists() && !args.force {
return Err(anyhow!(
"目标文件已存在: {}\n如需覆盖,请追加 --force。",
args.output.display()
));
}
if let Some(parent) = args.output.parent() {
fs::create_dir_all(parent)?;
}
let parser = parser_for(&args.format)?;
let report = parser.validate(&source_path)?;
if !report.valid {
return Err(anyhow!("词典文件校验失败: {}", report.issues.join("; ")));
}
let name = args.name.unwrap_or_else(|| {
args.output
.file_stem()
.and_then(|value| value.to_str())
.unwrap_or("dictx")
.to_string()
});
let metadata = DxdictMetadata {
name,
display: args.display,
source_format: Some(args.format),
};
if let Some(shard_size) = args.shard_size {
let shards = write_dxdict_shards(
&args.output,
&metadata,
parser.parse(&source_path)?,
shard_size,
)?;
let total = shards.iter().map(|(_, count)| *count).sum::<usize>();
println!("已生成 DictX 专有词库分片:");
for (path, count) in &shards {
println!(" - {} ({} 条)", path.display(), count);
}
println!("词条数: {total}");
} else {
let count = write_dxdict(&args.output, &metadata, parser.parse(&source_path)?)?;
println!("已生成 DictX 专有词库: {}", args.output.display());
println!("词条数: {count}");
}
}
SourceSubcommand::Remove { name } => {
let before = config.sources.len();
config.sources.retain(|source| source.name != name);
if config.sources.len() == before {
return Err(anyhow!("没有找到词典源: {name}"));
}
config.save_to(&path)?;
println!("已移除词典源: {name}");
}
SourceSubcommand::Info { name } => {
let source = config
.sources
.iter()
.find(|source| source.name == name)
.ok_or_else(|| anyhow!("没有找到词典源: {name}"))?;
println!("{}", toml::to_string_pretty(source)?);
if let Some(meta) = read_metadata(&config.index_dir_for(&source.name))? {
println!(
"索引: {} 条, {}",
meta.entries,
human_bytes(meta.index_bytes)
);
} else {
println!("索引: 未构建");
}
}
}
Ok(())
}
fn print_init_next_steps(config: &AppConfig) {
println!(
"配置文件: {}",
AppConfig::resolve_path(None)
.map(|p| p.display().to_string())
.unwrap_or_else(|_| "<未知>".to_string())
);
println!("词库目录: {}", config.dict_dir.display());
println!("索引目录: {}", config.index_dir.display());
println!();
println!("已发现 {} 个词典源:", config.sources.len());
for source in &config.sources {
println!(
" - {} ({}) {}",
source.name,
source.format,
source_location(source)
);
}
println!();
println!("下一步:");
println!(" 1. 构建索引: dictx build");
println!(" 2. 查询测试: dictx lookup apple / dictx zh 老师");
println!(" 3. 查看词源: dictx source list");
println!(" 4. 导入词库: dictx source import ecdict /path/to/ecdict.csv --format ecdict");
println!(" 5. 获取更多开放词库: dictx source recommend");
println!();
println!("注意: init 只写入配置,不会自动生成索引;首次查询前必须先运行 dictx build。");
}
fn print_recommended_sources() {
println!("推荐开放词库:");
println!();
println!("1. ECDICT 英汉双解");
println!(" 用途: 英->中主词库,含音标、词频、考试标签、柯林斯/牛津标注");
println!(" 许可: MIT");
println!(" 地址: https://github.com/skywind3000/ECDICT");
println!(" 导入: dictx source import ecdict /path/to/ecdict.csv --format ecdict");
println!();
println!("2. CC-CEDICT 汉英词典");
println!(" 用途: 中->英补充词库,适合中文词、短语、拼音查询");
println!(" 许可: CC BY-SA");
println!(" 地址: https://cc-cedict.org/wiki/");
println!(" 导入: dictx source import cc_cedict /path/to/cedict_ts.u8 --format cedict");
println!();
println!("3. open-dict-data");
println!(" 用途: 多语言开放词典集合,可作为后续适配来源");
println!(" 地址: https://open-dict-data.github.io/");
println!();
println!("说明:");
println!(" - DictX 发布包已内置新世纪汉英与金山词霸词库,可直接 init + build。");
println!(" - source import 会将外部词库转换为 DictX 专有 .dxdict 格式后纳入管理目录。");
println!(" - source pack 只生成 .dxdict 文件,不修改配置,适合批量整理词库。");
}
fn config_command(config_path: Option<PathBuf>, command: ConfigSubcommand) -> Result<()> {
let path = AppConfig::resolve_path(config_path.as_deref())?;
let mut config = AppConfig::load_or_create(Some(&path))?;
match command {
ConfigSubcommand::Show => {
println!("{}", toml::to_string_pretty(&config)?);
}
ConfigSubcommand::Path => {
println!("{}", path.display());
}
ConfigSubcommand::Paths => {
println!("config = {}", path.display());
println!("dict_dir = {}", config.dict_dir.display());
println!("index_dir = {}", config.index_dir.display());
}
ConfigSubcommand::Get { key } => {
println!(
"{}",
config
.get_key(&key)
.ok_or_else(|| anyhow!("未知配置项: {key}"))?
);
}
ConfigSubcommand::Set { key, value } => {
config.set_key(&key, &value)?;
config.save_to(&path)?;
println!("已更新 {key}");
}
}
Ok(())
}
fn human_bytes(bytes: u64) -> String {
const UNITS: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
let mut value = bytes as f64;
let mut unit = 0;
while value >= 1024.0 && unit < UNITS.len() - 1 {
value /= 1024.0;
unit += 1;
}
if unit == 0 {
format!("{} {}", bytes, UNITS[unit])
} else {
format!("{value:.1} {}", UNITS[unit])
}
}
fn source_location(source: &SourceConfig) -> String {
let value = source.path.to_string_lossy();
if value.starts_with("builtin:") {
format!("内置包 {value}")
} else {
source.path.display().to_string()
}
}
fn write_dxdict_shards<I>(
output: &std::path::Path,
metadata: &DxdictMetadata,
entries: I,
shard_size: usize,
) -> Result<Vec<(PathBuf, usize)>>
where
I: IntoIterator<Item = dictx_core::Result<dictx_core::DictEntry>>,
{
if shard_size == 0 {
return Err(anyhow!("--shard-size 必须大于 0"));
}
let mut shards = Vec::new();
let mut buffer = Vec::with_capacity(shard_size.min(8192));
let mut shard_index = 0usize;
for entry in entries {
buffer.push(entry?);
if buffer.len() >= shard_size {
let path = shard_path(output, shard_index);
let count = write_dxdict(&path, metadata, buffer.drain(..).map(|entry| Ok(entry)))?;
shards.push((path, count));
shard_index += 1;
}
}
if !buffer.is_empty() || shards.is_empty() {
let path = shard_path(output, shard_index);
let count = write_dxdict(&path, metadata, buffer.drain(..).map(|entry| Ok(entry)))?;
shards.push((path, count));
}
Ok(shards)
}
fn shard_path(output: &std::path::Path, shard_index: usize) -> PathBuf {
let parent = output.parent().unwrap_or_else(|| std::path::Path::new(""));
let stem = output
.file_stem()
.and_then(|value| value.to_str())
.unwrap_or("dictx");
parent.join(format!("{stem}-{shard_index:03}.dxdict"))
}