1use std::collections::{HashMap, HashSet};
2use std::fs;
3use std::path::Path;
4
5use rayon::prelude::*;
6use sha2::{Digest, Sha256};
7
8use crate::error::{Error, Result};
9use crate::search::bm25;
10use crate::types::{DocEntry, Entry, Registry, SearchIndex, SkillEntry};
11
12use super::discovery::{discover_author, load_author_registry};
13
14#[derive(Debug)]
16pub struct BuildOptions {
17 pub base_url: Option<String>,
18 pub validate_only: bool,
19 pub incremental: bool,
22}
23
24impl Default for BuildOptions {
25 fn default() -> Self {
26 Self {
27 base_url: None,
28 validate_only: false,
29 incremental: true,
30 }
31 }
32}
33
34#[derive(Debug)]
36pub struct BuildResult {
37 pub registry: Registry,
38 pub search_index: SearchIndex,
39 pub docs_count: usize,
40 pub skills_count: usize,
41 pub warnings: Vec<String>,
42}
43
44pub fn build_registry(content_dir: &Path, opts: &BuildOptions) -> Result<BuildResult> {
49 if !content_dir.exists() {
50 return Err(Error::ContentDirNotFound(content_dir.to_path_buf()));
51 }
52
53 let mut all_docs: Vec<DocEntry> = Vec::new();
54 let mut all_skills: Vec<SkillEntry> = Vec::new();
55 let mut all_warnings: Vec<String> = Vec::new();
56 let mut all_errors: Vec<String> = Vec::new();
57
58 let mut author_dirs: Vec<(String, std::path::PathBuf)> = Vec::new();
60 for entry in fs::read_dir(content_dir)? {
61 let entry = entry?;
62 if !entry.file_type()?.is_dir() {
63 continue;
64 }
65 let name = entry.file_name().to_string_lossy().to_string();
66 if name == "dist" || name.starts_with('.') {
67 continue;
68 }
69 author_dirs.push((name, entry.path()));
70 }
71
72 for (author_name, author_dir) in &author_dirs {
73 let author_registry = author_dir.join("registry.json");
74
75 if author_registry.exists() {
76 match load_author_registry(author_dir, author_name) {
77 Ok((docs, skills)) => {
78 all_docs.extend(docs);
79 all_skills.extend(skills);
80 }
81 Err(e) => {
82 all_errors.push(format!("{}/registry.json: {}", author_name, e));
83 }
84 }
85 } else {
86 let result = discover_author(author_dir, author_name, content_dir);
87 all_docs.extend(result.docs);
88 all_skills.extend(result.skills);
89 all_warnings.extend(result.warnings);
90 all_errors.extend(result.errors);
91 }
92 }
93
94 let mut seen = HashSet::with_capacity(all_docs.len() + all_skills.len());
96 for doc in &all_docs {
97 if !seen.insert(&doc.id) {
98 all_errors.push(format!("Duplicate doc id '{}'", doc.id));
99 }
100 }
101 for skill in &all_skills {
102 if !seen.insert(&skill.id) {
103 all_errors.push(format!("Duplicate skill id '{}'", skill.id));
104 }
105 }
106
107 if !all_errors.is_empty() {
108 return Err(Error::BuildErrors(all_errors.join("\n")));
109 }
110
111 let entries: Vec<Entry> = all_docs
113 .iter()
114 .map(Entry::Doc)
115 .chain(all_skills.iter().map(Entry::Skill))
116 .collect();
117 let search_index = bm25::build_index(&entries);
118
119 let docs_count = all_docs.len();
120 let skills_count = all_skills.len();
121 let generated = now_iso8601();
122
123 let registry = Registry {
124 version: "1.0.0".to_string(),
125 generated,
126 docs: all_docs,
127 skills: all_skills,
128 base_url: opts.base_url.clone(),
129 };
130
131 Ok(BuildResult {
132 docs_count,
133 skills_count,
134 warnings: all_warnings,
135 registry,
136 search_index,
137 })
138}
139
140const BUILD_MANIFEST_NAME: &str = ".build-manifest.json";
142
143fn sha256_file(path: &Path) -> std::io::Result<String> {
145 let data = fs::read(path)?;
146 let hash = Sha256::digest(&data);
147 Ok(format!("{:x}", hash))
148}
149
150fn load_build_manifest(output_dir: &Path) -> HashMap<String, String> {
153 let manifest_path = output_dir.join(BUILD_MANIFEST_NAME);
154 if let Ok(data) = fs::read_to_string(&manifest_path) {
155 serde_json::from_str(&data).unwrap_or_default()
156 } else {
157 HashMap::new()
158 }
159}
160
161fn save_build_manifest(output_dir: &Path, manifest: &HashMap<String, String>) -> Result<()> {
163 use std::io::BufWriter;
164 let file = fs::File::create(output_dir.join(BUILD_MANIFEST_NAME))?;
165 let writer = BufWriter::new(file);
166 serde_json::to_writer_pretty(writer, manifest)?;
167 Ok(())
168}
169
170pub fn write_build_output(
172 content_dir: &Path,
173 output_dir: &Path,
174 result: &BuildResult,
175) -> Result<()> {
176 write_build_output_with_opts(content_dir, output_dir, result, &BuildOptions::default())
177}
178
179pub fn write_build_output_with_opts(
181 content_dir: &Path,
182 output_dir: &Path,
183 result: &BuildResult,
184 opts: &BuildOptions,
185) -> Result<()> {
186 use std::io::BufWriter;
187
188 fs::create_dir_all(output_dir)?;
189
190 let file = fs::File::create(output_dir.join("registry.json"))?;
192 let writer = BufWriter::new(file);
193 serde_json::to_writer_pretty(writer, &result.registry)?;
194
195 let file = fs::File::create(output_dir.join("search-index.json"))?;
197 let writer = BufWriter::new(file);
198 serde_json::to_writer(writer, &result.search_index)?;
199
200 let bin_data = bincode::serialize(&result.search_index)
202 .map_err(|e| Error::BuildErrors(format!("bincode serialization failed: {}", e)))?;
203 fs::write(output_dir.join("search-index.bin"), &bin_data)?;
204
205 let index_html = generate_index_html(result);
207 fs::write(output_dir.join("index.html"), index_html)?;
208
209 let old_manifest = if opts.incremental {
211 load_build_manifest(output_dir)
212 } else {
213 HashMap::new()
214 };
215 let mut new_manifest: HashMap<String, String> = HashMap::new();
216
217 let mut dirs_to_create = Vec::new();
221 let mut files_to_copy: Vec<(std::path::PathBuf, std::path::PathBuf, String)> = Vec::new();
222
223 for entry in walkdir::WalkDir::new(content_dir)
224 .min_depth(1)
225 .into_iter()
226 .filter_entry(|e| {
227 if e.depth() == 1 && e.file_type().is_dir() {
229 let name = e.file_name().to_string_lossy();
230 return name != "dist" && !name.starts_with('.');
231 }
232 true
233 })
234 .filter_map(|e| e.ok())
235 {
236 if entry.file_type().is_file() && entry.file_name() == "registry.json" && entry.depth() == 2
238 {
239 continue;
240 }
241
242 let rel = entry.path().strip_prefix(content_dir).unwrap();
243 let rel_str = rel.to_string_lossy().replace('\\', "/");
244 let dest = output_dir.join(rel);
245
246 if entry.file_type().is_dir() {
247 dirs_to_create.push(dest);
248 } else {
249 files_to_copy.push((entry.into_path(), dest, rel_str));
250 }
251 }
252
253 for dir in &dirs_to_create {
255 fs::create_dir_all(dir)?;
256 }
257
258 if opts.incremental {
259 let copy_results: Vec<std::result::Result<(String, String), Error>> = files_to_copy
261 .par_iter()
262 .map(|(src, dest, rel_str)| {
263 let hash = sha256_file(src).map_err(|e| {
264 Error::BuildErrors(format!("hash failed for {}: {}", rel_str, e))
265 })?;
266
267 if old_manifest.get(rel_str).map(|h| h.as_str()) == Some(hash.as_str()) {
269 return Ok((rel_str.clone(), hash));
270 }
271
272 fs::copy(src, dest).map_err(|e| {
273 Error::BuildErrors(format!("copy failed for {}: {}", rel_str, e))
274 })?;
275 Ok((rel_str.clone(), hash))
276 })
277 .collect();
278
279 for res in copy_results {
280 let (rel_str, hash) = res?;
281 new_manifest.insert(rel_str, hash);
282 }
283
284 save_build_manifest(output_dir, &new_manifest)?;
286 } else {
287 let copy_results: Vec<std::result::Result<(), Error>> = files_to_copy
289 .par_iter()
290 .map(|(src, dest, rel_str)| {
291 fs::copy(src, dest).map_err(|e| {
292 Error::BuildErrors(format!("copy failed for {}: {}", rel_str, e))
293 })?;
294 Ok(())
295 })
296 .collect();
297
298 for res in copy_results {
299 res?;
300 }
301 }
302
303 Ok(())
304}
305
306const INDEX_TEMPLATE: &str = include_str!("static/index.html");
308const INDEX_STYLE: &str = include_str!("static/style.css");
309const INDEX_SCRIPT: &str = include_str!("static/script.js");
310
311fn generate_index_html(result: &BuildResult) -> String {
314 let mut entries = Vec::new();
316 for doc in &result.registry.docs {
317 let langs: Vec<serde_json::Value> = doc
318 .languages
319 .iter()
320 .map(|l| {
321 let versions: Vec<serde_json::Value> = l
322 .versions
323 .iter()
324 .map(|v| {
325 serde_json::json!({
326 "version": v.version,
327 "path": v.path,
328 })
329 })
330 .collect();
331 serde_json::json!({
332 "language": l.language,
333 "recommended": l.recommended_version,
334 "versions": versions,
335 })
336 })
337 .collect();
338 let lang_names: Vec<&str> = doc.languages.iter().map(|l| l.language.as_str()).collect();
339 entries.push(serde_json::json!({
340 "id": doc.id,
341 "name": doc.name,
342 "description": doc.description,
343 "source": doc.source,
344 "tags": doc.tags,
345 "type": "doc",
346 "langNames": lang_names,
347 "langs": langs,
348 }));
349 }
350 for skill in &result.registry.skills {
351 entries.push(serde_json::json!({
352 "id": skill.id,
353 "name": skill.name,
354 "description": skill.description,
355 "source": skill.source,
356 "tags": skill.tags,
357 "type": "skill",
358 "langNames": [],
359 "langs": [],
360 "path": skill.path,
361 }));
362 }
363 let catalog_json = serde_json::to_string(&entries).unwrap_or_else(|_| "[]".to_string());
364
365 let docs_count = result.docs_count;
366 let skills_count = result.skills_count;
367 let generated = &result.registry.generated;
368
369 let mut languages: Vec<&str> = result
370 .registry
371 .docs
372 .iter()
373 .flat_map(|d| d.languages.iter().map(|l| l.language.as_str()))
374 .collect();
375 languages.sort();
376 languages.dedup();
377
378 INDEX_TEMPLATE
379 .replace("{style}", INDEX_STYLE)
380 .replace("{script}", INDEX_SCRIPT)
381 .replace("{docs}", &docs_count.to_string())
382 .replace("{skills}", &skills_count.to_string())
383 .replace("{lang_count}", &languages.len().to_string())
384 .replace("{generated}", generated)
385 .replace("{catalog}", &catalog_json)
386}
387
388fn now_iso8601() -> String {
390 crate::util::now_iso8601()
391}
392
393pub fn days_to_date(days: u64) -> (u64, u64, u64) {
396 crate::util::days_to_date(days)
397}