1use super::*;
2
3pub fn write_doc_set(out_dir: &Path, docs: &[(String, String)]) -> anyhow::Result<()> {
4 std::fs::create_dir_all(out_dir)?;
5 for (relative_path, content) in docs {
6 write_doc(out_dir, relative_path, content)?;
7 }
8 Ok(())
9}
10
11pub fn write_incremental_doc_set(
12 project_root: &Path,
13 out_dir: &Path,
14 docs: &[(String, String)],
15) -> anyhow::Result<Vec<String>> {
16 let docs = docs
17 .iter()
18 .map(|(path, content)| BuiltDoc::healthy(path.clone(), content.clone()))
19 .collect::<Vec<_>>();
20 write_incremental_doc_set_with_snapshot(
21 project_root,
22 out_dir,
23 &docs,
24 None,
25 "off",
26 DocPruneScope::unscoped(),
27 )
28}
29
30pub(crate) fn write_incremental_doc_set_with_snapshot(
31 project_root: &Path,
32 out_dir: &Path,
33 docs: &[BuiltDoc],
34 index_snapshot: Option<CodewikiIndexSnapshot>,
35 ai_mode: &str,
36 prune_scope: DocPruneScope,
37) -> anyhow::Result<Vec<String>> {
38 let mut sink = DocSink::open_with_prune_scope(project_root, out_dir, ai_mode, prune_scope)?;
39 for doc in docs {
40 sink.persist(doc)?;
41 }
42 sink.finish(index_snapshot)
43}
44
45#[derive(Clone, Debug, Default)]
46pub(crate) struct DocPruneScope {
47 scopes: Vec<String>,
48}
49
50impl DocPruneScope {
51 pub(crate) fn unscoped() -> Self {
52 Self { scopes: Vec::new() }
53 }
54
55 pub(crate) fn from_scopes(scopes: &[String]) -> Self {
56 if scopes.is_empty() || scopes.iter().any(|scope| scope.is_empty()) {
57 Self::unscoped()
58 } else {
59 Self {
60 scopes: scopes.to_vec(),
61 }
62 }
63 }
64
65 pub(crate) fn is_unscoped(&self) -> bool {
66 self.scopes.is_empty()
67 }
68
69 pub(crate) fn includes_file(&self, file: &str) -> bool {
70 self.is_unscoped() || in_scope(file, &self.scopes)
71 }
72
73 pub(crate) fn includes_module(&self, module: &str) -> bool {
74 self.is_unscoped() || in_scope(module, &self.scopes)
75 }
76
77 pub(crate) fn includes_doc(&self, doc_path: &str) -> bool {
78 if self.is_unscoped() {
79 return true;
80 }
81 if let Some(file) = scoped_file_doc(doc_path) {
82 return self.includes_file(file);
83 }
84 if let Some(module) = scoped_module_doc(doc_path) {
85 return self.includes_module(module);
86 }
87 false
88 }
89
90 fn should_prune(&self, doc_path: &str) -> bool {
91 self.includes_doc(doc_path)
92 }
93}
94
95pub(crate) struct DocSink<'a> {
100 project_root: &'a Path,
101 out_dir: &'a Path,
102 ai_mode: String,
103 previous_docs: BTreeMap<String, CodewikiDocMeta>,
104 next_docs: BTreeMap<String, CodewikiDocMeta>,
105 seen: BTreeSet<String>,
106 generated_docs: Vec<String>,
107 previous_snapshot: Option<CodewikiIndexSnapshot>,
108 prune_scope: DocPruneScope,
109 degraded_docs: Vec<String>,
114 since: Option<BTreeSet<String>>,
120}
121
122impl<'a> DocSink<'a> {
123 #[cfg(test)]
124 pub(crate) fn open(
125 project_root: &'a Path,
126 out_dir: &'a Path,
127 ai_mode: &str,
128 ) -> anyhow::Result<Self> {
129 Self::open_with_prune_scope(project_root, out_dir, ai_mode, DocPruneScope::unscoped())
130 }
131
132 pub(crate) fn open_with_prune_scope(
133 project_root: &'a Path,
134 out_dir: &'a Path,
135 ai_mode: &str,
136 prune_scope: DocPruneScope,
137 ) -> anyhow::Result<Self> {
138 std::fs::create_dir_all(out_dir)?;
139 let previous = read_codewiki_meta(out_dir)?;
140 Ok(Self {
141 project_root,
142 out_dir,
143 ai_mode: ai_mode.to_string(),
144 previous_docs: previous.docs.clone(),
145 next_docs: previous.docs,
149 seen: BTreeSet::new(),
150 generated_docs: Vec::new(),
151 previous_snapshot: previous.index_snapshot,
152 prune_scope,
153 degraded_docs: Vec::new(),
154 since: None,
155 })
156 }
157
158 pub(crate) fn with_since(mut self, since: Option<BTreeSet<String>>) -> Self {
162 self.since = since;
163 self
164 }
165
166 pub(crate) fn persist(&mut self, doc: &BuiltDoc) -> anyhow::Result<bool> {
169 let target = safe_doc_path(self.out_dir, &doc.path)?;
170 let previous_meta = self.previous_docs.get(&doc.path);
171 if let (Some(since), Some(meta)) = (self.since.as_ref(), previous_meta)
172 && doc.invalidation_key.is_none()
173 && target.exists()
174 && !meta.degraded
175 && meta.ai_mode == self.ai_mode
176 && meta.render_version == CODEWIKI_RENDER_VERSION
177 && !meta.source_hashes.is_empty()
178 && (doc.summary.is_none() || meta.summary.is_some())
179 && meta
180 .source_hashes
181 .keys()
182 .chain(meta.neighbor_hashes.keys())
183 .all(|file| !since.contains(file))
184 {
185 self.next_docs.insert(doc.path.clone(), meta.clone());
186 self.seen.insert(doc.path.clone());
187 self.flush()?;
188 return Ok(false);
189 }
190
191 let source_hashes = source_hashes_for_doc(self.project_root, &doc.content)?;
192 let neighbor_hashes = neighbor_hashes_for_doc(self.project_root, &doc.neighbors)?;
193 let unchanged = target.exists()
211 && previous_meta.is_some_and(|meta| {
212 !meta.degraded
213 && meta.ai_mode == self.ai_mode
214 && meta.render_version == CODEWIKI_RENDER_VERSION
215 && match &doc.invalidation_key {
216 Some(key) => {
217 meta.invalidation_key.as_deref() == Some(key.as_str())
218 && (!doc.invalidation_key_requires_sources
219 || (!source_hashes.is_empty()
220 && meta.source_hashes == source_hashes
221 && meta.neighbor_hashes == neighbor_hashes))
222 }
223 None => {
224 !source_hashes.is_empty()
225 && meta.source_hashes == source_hashes
226 && meta.neighbor_hashes == neighbor_hashes
227 && (doc.summary.is_none() || meta.summary.is_some())
228 }
229 }
230 });
231 let since_unchanged = doc.invalidation_key.is_none()
237 && !source_hashes.is_empty()
238 && target.exists()
239 && previous_meta.is_some_and(|meta| {
240 !meta.degraded
241 && meta.ai_mode == self.ai_mode
242 && meta.render_version == CODEWIKI_RENDER_VERSION
243 && source_hash_key_sets_match(&meta.source_hashes, &source_hashes)
244 && source_hash_key_sets_match(&meta.neighbor_hashes, &neighbor_hashes)
245 && (doc.summary.is_none() || meta.summary.is_some())
246 })
247 && self.since.as_ref().is_some_and(|since| {
248 source_hashes
249 .keys()
250 .chain(neighbor_hashes.keys())
251 .all(|file| !since.contains(file))
252 });
253 let unchanged = unchanged || since_unchanged;
254
255 let entry = if unchanged {
256 previous_meta.cloned().unwrap_or_default()
261 } else {
262 write_doc(self.out_dir, &doc.path, &doc.content)?;
263 self.generated_docs.push(doc.path.clone());
264 if doc.degraded {
265 self.degraded_docs.push(doc.path.clone());
266 }
267 CodewikiDocMeta {
268 source_hashes,
269 degraded: doc.degraded,
270 summary: if doc.degraded {
273 None
274 } else {
275 doc.summary.clone()
276 },
277 ai_mode: self.ai_mode.clone(),
278 render_version: CODEWIKI_RENDER_VERSION,
279 neighbor_hashes,
280 invalidation_key: doc.invalidation_key.clone(),
281 }
282 };
283 self.next_docs.insert(doc.path.clone(), entry);
284 self.seen.insert(doc.path.clone());
285 self.flush()?;
286 Ok(!unchanged)
287 }
288
289 pub(crate) fn degraded_docs(&self) -> &[String] {
292 &self.degraded_docs
293 }
294
295 fn flush(&self) -> anyhow::Result<()> {
296 let meta = CodewikiMeta {
297 docs: self.next_docs.clone(),
298 generated_docs: self.generated_docs.clone(),
299 index_snapshot: self.previous_snapshot.clone(),
303 ai_mode: self.ai_mode.clone(),
304 };
305 write_codewiki_meta(self.out_dir, &meta)
306 }
307
308 pub(crate) fn finish(
311 mut self,
312 index_snapshot: Option<CodewikiIndexSnapshot>,
313 ) -> anyhow::Result<Vec<String>> {
314 let mut stale = self
326 .next_docs
327 .keys()
328 .filter(|key| !self.seen.contains(*key) && self.prune_scope.should_prune(key))
329 .cloned()
330 .collect::<BTreeSet<_>>();
331 for doc_path in collect_generated_doc_pages(self.out_dir)? {
332 if !self.seen.contains(&doc_path) && self.prune_scope.should_prune(&doc_path) {
333 stale.insert(doc_path);
334 }
335 }
336 for stale_path in stale {
337 let target = safe_doc_path(self.out_dir, &stale_path)?;
338 reject_symlinked_doc_path(self.out_dir, &target)?;
339 match std::fs::remove_file(&target) {
340 Ok(()) => prune_empty_doc_dirs(self.out_dir, &target)?,
341 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
342 Err(err) => return Err(err.into()),
343 }
344 self.next_docs.remove(&stale_path);
345 }
346 let meta = CodewikiMeta {
347 docs: self.next_docs,
348 generated_docs: self.generated_docs.clone(),
349 index_snapshot: index_snapshot.or(self.previous_snapshot),
350 ai_mode: self.ai_mode,
351 };
352 write_codewiki_meta(self.out_dir, &meta)?;
353 Ok(self.generated_docs)
354 }
355}
356
357fn collect_generated_doc_pages(out_dir: &Path) -> anyhow::Result<Vec<String>> {
365 let code_root = out_dir.join("code");
366 if !code_root.is_dir() {
367 return Ok(Vec::new());
368 }
369 let mut pages = Vec::new();
370 let mut stack = vec![code_root];
371 while let Some(dir) = stack.pop() {
372 for entry in std::fs::read_dir(&dir)? {
373 let entry = entry?;
374 let file_type = entry.file_type()?;
375 if file_type.is_symlink() {
376 continue;
377 }
378 let path = entry.path();
379 if file_type.is_dir() {
380 stack.push(path);
381 } else if file_type.is_file()
382 && path.extension().is_some_and(|ext| ext == "md")
383 && let Ok(rel) = path.strip_prefix(out_dir)
384 {
385 pages.push(
386 rel.to_string_lossy()
387 .replace(std::path::MAIN_SEPARATOR, "/"),
388 );
389 }
390 }
391 }
392 Ok(pages)
393}
394
395fn scoped_file_doc(doc_path: &str) -> Option<&str> {
396 doc_path
397 .strip_prefix("code/files/")
398 .and_then(|path| path.strip_suffix(".md"))
399}
400
401fn scoped_module_doc(doc_path: &str) -> Option<&str> {
402 doc_path
403 .strip_prefix("code/modules/")
404 .and_then(|path| path.strip_suffix(".md"))
405}
406
407pub(crate) fn write_doc(out_dir: &Path, relative_path: &str, content: &str) -> anyhow::Result<()> {
408 let target = safe_doc_path(out_dir, relative_path)?;
409 reject_symlinked_doc_path(out_dir, &target)?;
410 if let Some(parent) = target.parent() {
411 std::fs::create_dir_all(parent)?;
412 }
413 std::fs::write(target, content)?;
414 Ok(())
415}
416
417pub(crate) fn reject_symlinked_doc_path(out_dir: &Path, target: &Path) -> anyhow::Result<()> {
418 let relative = target.strip_prefix(out_dir)?;
419 let mut current = out_dir.to_path_buf();
420 for component in relative.components() {
421 current.push(component);
422 match std::fs::symlink_metadata(¤t) {
423 Ok(metadata) if metadata.file_type().is_symlink() => {
424 anyhow::bail!(
425 "refusing to follow symlinked codewiki path: {}",
426 current.display()
427 );
428 }
429 Ok(_) => {}
430 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
431 Err(err) => return Err(err.into()),
432 }
433 }
434 Ok(())
435}
436
437pub(crate) fn prune_empty_doc_dirs(out_dir: &Path, target: &Path) -> anyhow::Result<()> {
438 let mut current = target.parent();
439 while let Some(dir) = current {
440 if dir == out_dir {
441 break;
442 }
443 match std::fs::remove_dir(dir) {
444 Ok(()) => current = dir.parent(),
445 Err(err)
446 if matches!(
447 err.kind(),
448 std::io::ErrorKind::NotFound | std::io::ErrorKind::DirectoryNotEmpty
449 ) =>
450 {
451 break;
452 }
453 Err(err) => return Err(err.into()),
454 }
455 }
456 Ok(())
457}
458
459pub(crate) fn read_codewiki_meta(out_dir: &Path) -> anyhow::Result<CodewikiMeta> {
460 let path = safe_doc_path(out_dir, CODEWIKI_META_PATH)?;
461 let mut meta: CodewikiMeta = match std::fs::read_to_string(&path) {
462 Ok(raw) => serde_json::from_str(&raw)?,
463 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
464 return Ok(CodewikiMeta::default());
465 }
466 Err(err) => return Err(err.into()),
467 };
468 let run_mode = meta.ai_mode.clone();
471 for doc in meta.docs.values_mut() {
472 if doc.ai_mode.is_empty() {
473 doc.ai_mode = run_mode.clone();
474 }
475 }
476 Ok(meta)
477}
478
479pub(crate) fn write_codewiki_meta(out_dir: &Path, meta: &CodewikiMeta) -> anyhow::Result<()> {
480 let content = serde_json::to_string_pretty(meta)?;
481 write_doc(out_dir, CODEWIKI_META_PATH, &(content + "\n"))
482}
483
484pub(crate) fn read_ownership_meta(out_dir: &Path) -> anyhow::Result<OwnershipMeta> {
485 let path = safe_doc_path(out_dir, OWNERSHIP_META_PATH)?;
486 match std::fs::read_to_string(&path) {
487 Ok(raw) => Ok(serde_json::from_str::<OwnershipMeta>(&raw)?),
488 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(OwnershipMeta::default()),
489 Err(err) => Err(err.into()),
490 }
491}
492
493pub(crate) fn write_ownership_meta(out_dir: &Path, meta: &OwnershipMeta) -> anyhow::Result<()> {
494 let content = serde_json::to_string_pretty(meta)?;
495 write_doc(out_dir, OWNERSHIP_META_PATH, &(content + "\n"))
496}
497
498pub(crate) fn source_hashes_for_doc(
499 project_root: &Path,
500 content: &str,
501) -> anyhow::Result<BTreeMap<String, String>> {
502 let mut hashes = BTreeMap::new();
503 let canonical_root = project_root
504 .canonicalize()
505 .map_err(|err| anyhow::anyhow!("failed to resolve codewiki project root: {err}"))?;
506 for file in source_files_from_frontmatter(content) {
507 let source_path = project_root.join(&file);
508 let canonical_source = source_path.canonicalize().map_err(|err| {
509 anyhow::anyhow!("failed to resolve codewiki source file {file}: {err}")
510 })?;
511 if !canonical_source.starts_with(&canonical_root) {
512 anyhow::bail!("codewiki source file {file} resolves outside project root");
513 }
514 let hash = hasher::file_content_hash(&canonical_source)
515 .map_err(|err| anyhow::anyhow!("failed to hash codewiki source file {file}: {err}"))?;
516 hashes.insert(file, hash);
517 }
518 Ok(hashes)
519}
520
521fn source_hash_key_sets_match(
522 recorded: &BTreeMap<String, String>,
523 current: &BTreeMap<String, String>,
524) -> bool {
525 recorded.len() == current.len() && current.keys().all(|file| recorded.contains_key(file))
526}
527
528pub(crate) fn neighbor_hashes_for_doc(
533 project_root: &Path,
534 neighbors: &BTreeSet<String>,
535) -> anyhow::Result<BTreeMap<String, String>> {
536 if neighbors.is_empty() {
537 return Ok(BTreeMap::new());
538 }
539 let canonical_root = project_root
540 .canonicalize()
541 .map_err(|err| anyhow::anyhow!("failed to resolve codewiki project root: {err}"))?;
542 let mut hashes = BTreeMap::new();
543 for file in neighbors {
544 let Ok(canonical_source) = project_root.join(file).canonicalize() else {
545 continue;
546 };
547 if !canonical_source.starts_with(&canonical_root) {
548 continue;
549 }
550 if let Ok(hash) = hasher::file_content_hash(&canonical_source) {
551 hashes.insert(file.clone(), hash);
552 }
553 }
554 Ok(hashes)
555}
556
557pub(crate) fn source_files_from_frontmatter(content: &str) -> BTreeSet<String> {
558 let mut files = BTreeSet::new();
559
560 let mut lines = content.lines();
561 if lines.next() != Some("---") {
562 return files;
563 }
564 let frontmatter = lines
565 .take_while(|line| *line != "---")
566 .collect::<Vec<_>>()
567 .join("\n");
568 let Ok(serde_yaml::Value::Mapping(frontmatter)) =
569 serde_yaml::from_str::<serde_yaml::Value>(&frontmatter)
570 else {
571 return files;
572 };
573
574 for key in [gobby_core::codewiki_contract::PROVENANCE_KEY] {
575 let key = serde_yaml::Value::String(key.to_string());
576 let Some(serde_yaml::Value::Sequence(sources)) = frontmatter.get(&key) else {
577 continue;
578 };
579 for source in sources {
580 let serde_yaml::Value::Mapping(source) = source else {
581 continue;
582 };
583 let file_key = serde_yaml::Value::String(
584 gobby_core::codewiki_contract::PROVENANCE_FILE_KEY.to_string(),
585 );
586 if let Some(serde_yaml::Value::String(file)) = source.get(&file_key) {
587 files.insert(file.clone());
588 }
589 }
590 }
591 files
592}
593
594#[cfg(test)]
595pub(crate) fn unquote_yaml_string(value: &str) -> Option<String> {
596 let value = value.trim();
597 let inner = value.strip_prefix('"')?.strip_suffix('"')?;
598 let mut out = String::new();
599 let mut chars = inner.chars();
600 while let Some(ch) = chars.next() {
601 if ch == '\\' {
602 out.push(match chars.next()? {
603 '0' => '\0',
604 'a' => '\u{0007}',
605 'b' => '\u{0008}',
606 't' => '\t',
607 'n' => '\n',
608 'v' => '\u{000b}',
609 'f' => '\u{000c}',
610 'r' => '\r',
611 'e' => '\u{001b}',
612 '"' => '"',
613 '/' => '/',
614 '\\' => '\\',
615 'x' => decode_hex_escape(&mut chars, 2)?,
616 'u' => decode_hex_escape(&mut chars, 4)?,
617 'U' => decode_hex_escape(&mut chars, 8)?,
618 _ => return None,
619 });
620 } else {
621 out.push(ch);
622 }
623 }
624 Some(out)
625}
626
627#[cfg(test)]
628fn decode_hex_escape(chars: &mut std::str::Chars<'_>, digits: usize) -> Option<char> {
629 let mut value = 0_u32;
630 for _ in 0..digits {
631 value = value.checked_mul(16)?;
632 value = value.checked_add(chars.next()?.to_digit(16)?)?;
633 }
634 char::from_u32(value)
635}
636
637pub(crate) fn safe_doc_path(out_dir: &Path, relative_path: &str) -> anyhow::Result<PathBuf> {
638 let path = Path::new(relative_path);
639 if path.is_absolute()
640 || path
641 .components()
642 .any(|component| matches!(component, std::path::Component::ParentDir))
643 {
644 anyhow::bail!("refusing to write unsafe codewiki path: {relative_path}");
645 }
646 Ok(out_dir.join(path))
647}