1use std::collections::{HashMap, HashSet};
2use std::fs;
3use std::path::Path;
4
5use rayon::prelude::*;
6use sha2::{Digest, Sha256};
7
8use crate::error::{Error, Result};
9use crate::search::bm25;
10use crate::types::{DocEntry, Entry, Registry, SearchIndex, SkillEntry};
11
12use super::discovery::{discover_author, load_author_registry};
13
14#[derive(Debug)]
16pub struct BuildOptions {
17 pub base_url: Option<String>,
18 pub validate_only: bool,
19 pub incremental: bool,
22}
23
24impl Default for BuildOptions {
25 fn default() -> Self {
26 Self {
27 base_url: None,
28 validate_only: false,
29 incremental: true,
30 }
31 }
32}
33
34#[derive(Debug)]
36pub struct BuildResult {
37 pub registry: Registry,
38 pub search_index: SearchIndex,
39 pub docs_count: usize,
40 pub skills_count: usize,
41 pub warnings: Vec<String>,
42}
43
44pub fn build_registry(content_dir: &Path, opts: &BuildOptions) -> Result<BuildResult> {
49 if !content_dir.exists() {
50 return Err(Error::ContentDirNotFound(content_dir.to_path_buf()));
51 }
52
53 let mut all_docs: Vec<DocEntry> = Vec::new();
54 let mut all_skills: Vec<SkillEntry> = Vec::new();
55 let mut all_warnings: Vec<String> = Vec::new();
56 let mut all_errors: Vec<String> = Vec::new();
57
58 let mut author_dirs: Vec<(String, std::path::PathBuf)> = Vec::new();
60 for entry in fs::read_dir(content_dir)? {
61 let entry = entry?;
62 if !entry.file_type()?.is_dir() {
63 continue;
64 }
65 let name = entry.file_name().to_string_lossy().to_string();
66 if name == "dist" || name.starts_with('.') {
67 continue;
68 }
69 author_dirs.push((name, entry.path()));
70 }
71
72 for (author_name, author_dir) in &author_dirs {
73 let author_registry = author_dir.join("registry.json");
74
75 if author_registry.exists() {
76 match load_author_registry(author_dir, author_name) {
77 Ok((docs, skills)) => {
78 all_docs.extend(docs);
79 all_skills.extend(skills);
80 }
81 Err(e) => {
82 all_errors.push(format!("{}/registry.json: {}", author_name, e));
83 }
84 }
85 } else {
86 let result = discover_author(author_dir, author_name, content_dir);
87 all_docs.extend(result.docs);
88 all_skills.extend(result.skills);
89 all_warnings.extend(result.warnings);
90 all_errors.extend(result.errors);
91 }
92 }
93
94 let mut seen = HashSet::with_capacity(all_docs.len() + all_skills.len());
96 for doc in &all_docs {
97 if !seen.insert(&doc.id) {
98 all_errors.push(format!("Duplicate doc id '{}'", doc.id));
99 }
100 }
101 for skill in &all_skills {
102 if !seen.insert(&skill.id) {
103 all_errors.push(format!("Duplicate skill id '{}'", skill.id));
104 }
105 }
106
107 if !all_errors.is_empty() {
108 return Err(Error::BuildErrors(all_errors.join("\n")));
109 }
110
111 let entries: Vec<Entry> = all_docs
113 .iter()
114 .map(Entry::Doc)
115 .chain(all_skills.iter().map(Entry::Skill))
116 .collect();
117 let search_index = bm25::build_index(&entries);
118
119 let docs_count = all_docs.len();
120 let skills_count = all_skills.len();
121 let generated = now_iso8601();
122
123 let registry = Registry {
124 version: "1.0.0".to_string(),
125 generated,
126 docs: all_docs,
127 skills: all_skills,
128 base_url: opts.base_url.clone(),
129 };
130
131 Ok(BuildResult {
132 docs_count,
133 skills_count,
134 warnings: all_warnings,
135 registry,
136 search_index,
137 })
138}
139
140const BUILD_MANIFEST_NAME: &str = ".build-manifest.json";
142
143fn sha256_file(path: &Path) -> std::io::Result<String> {
145 let data = fs::read(path)?;
146 let hash = Sha256::digest(&data);
147 Ok(format!("{:x}", hash))
148}
149
150fn load_build_manifest(output_dir: &Path) -> HashMap<String, String> {
153 let manifest_path = output_dir.join(BUILD_MANIFEST_NAME);
154 if let Ok(data) = fs::read_to_string(&manifest_path) {
155 serde_json::from_str(&data).unwrap_or_default()
156 } else {
157 HashMap::new()
158 }
159}
160
161fn save_build_manifest(output_dir: &Path, manifest: &HashMap<String, String>) -> Result<()> {
163 use std::io::BufWriter;
164 let file = fs::File::create(output_dir.join(BUILD_MANIFEST_NAME))?;
165 let writer = BufWriter::new(file);
166 serde_json::to_writer_pretty(writer, manifest)?;
167 Ok(())
168}
169
170pub fn write_build_output(
172 content_dir: &Path,
173 output_dir: &Path,
174 result: &BuildResult,
175) -> Result<()> {
176 write_build_output_with_opts(content_dir, output_dir, result, &BuildOptions::default())
177}
178
179pub fn write_build_output_with_opts(
181 content_dir: &Path,
182 output_dir: &Path,
183 result: &BuildResult,
184 opts: &BuildOptions,
185) -> Result<()> {
186 use std::io::BufWriter;
187
188 fs::create_dir_all(output_dir)?;
189
190 let file = fs::File::create(output_dir.join("registry.json"))?;
192 let writer = BufWriter::new(file);
193 serde_json::to_writer_pretty(writer, &result.registry)?;
194
195 let file = fs::File::create(output_dir.join("search-index.json"))?;
197 let writer = BufWriter::new(file);
198 serde_json::to_writer(writer, &result.search_index)?;
199
200 let bin_data = bincode::serialize(&result.search_index)
202 .map_err(|e| Error::BuildErrors(format!("bincode serialization failed: {}", e)))?;
203 fs::write(output_dir.join("search-index.bin"), &bin_data)?;
204
205 let old_manifest = if opts.incremental {
207 load_build_manifest(output_dir)
208 } else {
209 HashMap::new()
210 };
211 let mut new_manifest: HashMap<String, String> = HashMap::new();
212
213 let mut dirs_to_create = Vec::new();
217 let mut files_to_copy: Vec<(std::path::PathBuf, std::path::PathBuf, String)> = Vec::new();
218
219 for entry in walkdir::WalkDir::new(content_dir)
220 .min_depth(1)
221 .into_iter()
222 .filter_entry(|e| {
223 if e.depth() == 1 && e.file_type().is_dir() {
225 let name = e.file_name().to_string_lossy();
226 return name != "dist" && !name.starts_with('.');
227 }
228 true
229 })
230 .filter_map(|e| e.ok())
231 {
232 if entry.file_type().is_file() && entry.file_name() == "registry.json" && entry.depth() == 2
234 {
235 continue;
236 }
237
238 let rel = entry.path().strip_prefix(content_dir).unwrap();
239 let rel_str = rel.to_string_lossy().to_string();
240 let dest = output_dir.join(rel);
241
242 if entry.file_type().is_dir() {
243 dirs_to_create.push(dest);
244 } else {
245 files_to_copy.push((entry.into_path(), dest, rel_str));
246 }
247 }
248
249 for dir in &dirs_to_create {
251 fs::create_dir_all(dir)?;
252 }
253
254 if opts.incremental {
255 let copy_results: Vec<std::result::Result<(String, String), Error>> = files_to_copy
257 .par_iter()
258 .map(|(src, dest, rel_str)| {
259 let hash = sha256_file(src).map_err(|e| {
260 Error::BuildErrors(format!("hash failed for {}: {}", rel_str, e))
261 })?;
262
263 if old_manifest.get(rel_str).map(|h| h.as_str()) == Some(hash.as_str()) {
265 return Ok((rel_str.clone(), hash));
266 }
267
268 fs::copy(src, dest).map_err(|e| {
269 Error::BuildErrors(format!("copy failed for {}: {}", rel_str, e))
270 })?;
271 Ok((rel_str.clone(), hash))
272 })
273 .collect();
274
275 for res in copy_results {
276 let (rel_str, hash) = res?;
277 new_manifest.insert(rel_str, hash);
278 }
279
280 save_build_manifest(output_dir, &new_manifest)?;
282 } else {
283 let copy_results: Vec<std::result::Result<(), Error>> = files_to_copy
285 .par_iter()
286 .map(|(src, dest, rel_str)| {
287 fs::copy(src, dest).map_err(|e| {
288 Error::BuildErrors(format!("copy failed for {}: {}", rel_str, e))
289 })?;
290 Ok(())
291 })
292 .collect();
293
294 for res in copy_results {
295 res?;
296 }
297 }
298
299 Ok(())
300}
301
302fn now_iso8601() -> String {
304 let now = std::time::SystemTime::now()
305 .duration_since(std::time::UNIX_EPOCH)
306 .unwrap_or_default()
307 .as_secs();
308
309 let secs_per_day = 86400u64;
312 let days = now / secs_per_day;
313 let time_of_day = now % secs_per_day;
314
315 let hours = time_of_day / 3600;
316 let minutes = (time_of_day % 3600) / 60;
317 let seconds = time_of_day % 60;
318
319 let (year, month, day) = days_to_date(days);
321
322 format!(
323 "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.000Z",
324 year, month, day, hours, minutes, seconds
325 )
326}
327
328pub fn days_to_date(days: u64) -> (u64, u64, u64) {
330 let z = days + 719468;
332 let era = z / 146097;
333 let doe = z - era * 146097;
334 let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
335 let y = yoe + era * 400;
336 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
337 let mp = (5 * doy + 2) / 153;
338 let d = doy - (153 * mp + 2) / 5 + 1;
339 let m = if mp < 10 { mp + 3 } else { mp - 9 };
340 let y = if m <= 2 { y + 1 } else { y };
341 (y, m, d)
342}