1use crate::{tantivy_error, DictxSchema, EntryPackWriter, ENTRY_PACK_FILE, SCHEMA_VERSION};
2use dictx_core::{DictEntry, Result};
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5use std::fs;
6use std::io::Read;
7use std::path::{Path, PathBuf};
8use std::time::Instant;
9use tantivy::Index;
10
11#[derive(Debug, Clone)]
12pub struct BuildOptions {
13 pub ram_mb: usize,
14 pub force: bool,
15 pub source_name: String,
16 pub source_path: Option<PathBuf>,
17}
18
19impl Default for BuildOptions {
20 fn default() -> Self {
21 Self {
22 ram_mb: 128,
23 force: false,
24 source_name: "default".to_string(),
25 source_path: None,
26 }
27 }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct IndexMetadata {
32 pub schema_version: u32,
33 pub source_name: String,
34 pub source_path: Option<PathBuf>,
35 pub source_sha256: Option<String>,
36 pub entries: usize,
37 pub built_at_unix: u64,
38 pub index_bytes: u64,
39 pub entry_pack: Option<String>,
40 pub entry_pack_bytes: Option<u64>,
41}
42
43#[derive(Debug, Clone)]
44pub struct BuildStats {
45 pub entries: usize,
46 pub elapsed_ms: u128,
47 pub index_bytes: u64,
48}
49
50pub fn open_index(path: &Path) -> Result<Index> {
51 Index::open_in_dir(path).map_err(tantivy_error)
52}
53
54pub fn build_index<I>(index_dir: &Path, entries: I, options: &BuildOptions) -> Result<BuildStats>
55where
56 I: IntoIterator<Item = Result<DictEntry>>,
57{
58 if index_dir.exists() {
61 fs::remove_dir_all(index_dir)?;
62 }
63 fs::create_dir_all(index_dir)?;
64
65 let dictx_schema = DictxSchema::build();
66 let index =
67 Index::create_in_dir(index_dir, dictx_schema.schema.clone()).map_err(tantivy_error)?;
68
69 let mut writer = index
70 .writer((options.ram_mb.max(16) * 1024 * 1024) as usize)
71 .map_err(tantivy_error)?;
72 let mut pack = EntryPackWriter::create(&index_dir.join(ENTRY_PACK_FILE))?;
73 let start = Instant::now();
74 let mut count = 0usize;
75
76 for entry in entries {
77 let entry = entry?;
78 let locator = pack.append(&entry)?;
79 let doc = dictx_schema.to_document(&entry, Some(locator))?;
80 writer.add_document(doc).map_err(tantivy_error)?;
81 count += 1;
82 }
83
84 pack.finish()?;
85 writer.commit().map_err(tantivy_error)?;
86 writer.wait_merging_threads().map_err(tantivy_error)?;
87
88 let index_bytes = dir_size(index_dir)?;
89 let stats = BuildStats {
90 entries: count,
91 elapsed_ms: start.elapsed().as_millis(),
92 index_bytes,
93 };
94 write_metadata(index_dir, options, &stats)?;
95 Ok(stats)
96}
97
98pub fn read_metadata(index_dir: &Path) -> Result<Option<IndexMetadata>> {
99 let path = metadata_path(index_dir);
100 if !path.exists() {
101 return Ok(None);
102 }
103 let text = fs::read_to_string(path)?;
104 Ok(Some(serde_json::from_str(&text)?))
105}
106
107fn write_metadata(index_dir: &Path, options: &BuildOptions, stats: &BuildStats) -> Result<()> {
108 let metadata = IndexMetadata {
109 schema_version: SCHEMA_VERSION,
110 source_name: options.source_name.clone(),
111 source_path: options.source_path.clone(),
112 source_sha256: options
113 .source_path
114 .as_deref()
115 .and_then(|path| sha256_file(path).ok()),
116 entries: stats.entries,
117 built_at_unix: std::time::SystemTime::now()
118 .duration_since(std::time::UNIX_EPOCH)
119 .unwrap_or_default()
120 .as_secs(),
121 index_bytes: stats.index_bytes,
122 entry_pack: Some(ENTRY_PACK_FILE.to_string()),
123 entry_pack_bytes: Some(
124 fs::metadata(index_dir.join(ENTRY_PACK_FILE))
125 .map(|metadata| metadata.len())
126 .unwrap_or(0),
127 ),
128 };
129 fs::write(
130 metadata_path(index_dir),
131 serde_json::to_string_pretty(&metadata)?,
132 )?;
133 Ok(())
134}
135
136fn metadata_path(index_dir: &Path) -> PathBuf {
137 index_dir.join("dictx-meta.json")
138}
139
140pub fn dir_size(path: &Path) -> Result<u64> {
141 let mut total = 0u64;
142 for entry in walkdir::WalkDir::new(path) {
143 let entry = entry.map_err(|err| dictx_core::DictxError::Message(err.to_string()))?;
144 if entry.file_type().is_file() {
145 total += entry
146 .metadata()
147 .map_err(|err| dictx_core::DictxError::Message(err.to_string()))?
148 .len();
149 }
150 }
151 Ok(total)
152}
153
154fn sha256_file(path: &Path) -> Result<String> {
155 let mut file = fs::File::open(path)?;
156 let mut hasher = Sha256::new();
157 let mut buf = [0u8; 64 * 1024];
158 loop {
159 let read = file.read(&mut buf)?;
160 if read == 0 {
161 break;
162 }
163 hasher.update(&buf[..read]);
164 }
165 Ok(format!("{:x}", hasher.finalize()))
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use dictx_core::{Definition, DictSource};
172
173 #[test]
174 fn builds_small_index() {
175 let dir = tempfile::tempdir().unwrap();
176 let mut entry = DictEntry::new(DictSource::Custom { name: "t".into() }, "apple");
177 entry
178 .definitions
179 .push(Definition::new("fruit", "苹果", Some("n".into())));
180
181 let stats = build_index(
182 dir.path(),
183 vec![Ok(entry)],
184 &BuildOptions {
185 force: true,
186 ..BuildOptions::default()
187 },
188 )
189 .unwrap();
190 assert_eq!(stats.entries, 1);
191 assert!(dir.path().join("dictx-meta.json").exists());
192 }
193}