1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
use anyhow::{bail, Context, Result};
use std::path::PathBuf;
use crate::utils::is_windows_reserved_name;
use super::core::{parse_frontmatter, KnowledgeManager, PageFrontmatter, PageInfo};
impl KnowledgeManager {
/// List all `.md` pages in the knowledge worktree with parsed frontmatter.
///
/// Reads only the first 4 KiB of each file to extract frontmatter,
/// avoiding full-file reads for pages with large body content (#427).
///
/// # Errors
/// Returns an error if the cache directory cannot be read or a file cannot be opened.
pub fn list_pages(&self) -> Result<Vec<PageInfo>> {
use std::io::Read;
/// Maximum bytes to read for frontmatter extraction. YAML frontmatter
/// in knowledge pages is typically <1 KiB; 4 KiB provides ample margin.
const FRONTMATTER_READ_LIMIT: usize = 4096;
let mut pages = Vec::new();
if !self.cache_dir.exists() {
return Ok(pages);
}
for entry in std::fs::read_dir(&self.cache_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().is_some_and(|e| e == "md") {
let slug = path
.file_stem()
.unwrap_or_default()
.to_string_lossy()
.to_string();
// Read only the first N bytes — enough for frontmatter.
let content = {
let mut file = std::fs::File::open(&path)?;
let mut buf = vec![0u8; FRONTMATTER_READ_LIMIT];
let n = file.read(&mut buf)?;
buf.truncate(n);
String::from_utf8_lossy(&buf).into_owned()
};
let frontmatter = parse_frontmatter(&content).unwrap_or_else(|| PageFrontmatter {
title: slug.clone(),
tags: Vec::new(),
sources: Vec::new(),
contributors: Vec::new(),
created: String::new(),
updated: String::new(),
});
pages.push(PageInfo { slug, frontmatter });
}
}
pages.sort_by(|a, b| a.slug.cmp(&b.slug));
Ok(pages)
}
/// Validate a slug and return the safe path within the cache directory.
///
/// Rejects slugs containing path separators, parent-directory traversals,
/// or characters that are unsafe for filenames.
pub(crate) fn safe_page_path(&self, slug: &str) -> Result<PathBuf> {
if slug.is_empty() {
bail!("Page slug cannot be empty");
}
if slug.contains('/') || slug.contains('\\') || slug.contains('\0') || slug.contains("..") {
bail!("Invalid page slug '{slug}': must not contain path separators or '..'");
}
if is_windows_reserved_name(slug) {
bail!("Invalid page slug '{slug}': Windows reserved filename");
}
let path = self.cache_dir.join(format!("{slug}.md"));
// Defense in depth: verify the resolved path is within cache_dir.
// Both paths must be canonicalized for a reliable starts_with check.
// If either canonicalization fails (directory does not exist yet),
// reject the path rather than silently skipping the check.
if let (Ok(canonical_cache), Some(canonical_parent)) = (
self.cache_dir.canonicalize(),
path.parent().and_then(|p| p.canonicalize().ok()),
) {
if !canonical_parent.starts_with(&canonical_cache) {
bail!("Invalid page slug '{slug}': resolves outside knowledge cache");
}
}
Ok(path)
}
/// Read a page by its filename slug (without `.md` extension).
///
/// # Errors
/// Returns an error if the slug is invalid or the page does not exist.
pub fn read_page(&self, slug: &str) -> Result<String> {
let path = self.safe_page_path(slug)?;
if !path.exists() {
bail!("Page '{slug}' not found");
}
std::fs::read_to_string(&path).context("Failed to read page")
}
/// Write or overwrite a page by its filename slug.
///
/// # Errors
/// Returns an error if the cache is not initialized or the write fails.
pub fn write_page(&self, slug: &str, content: &str) -> Result<()> {
if !self.cache_dir.exists() {
bail!("Knowledge cache not initialized. Run init_cache() first.");
}
let path = self.safe_page_path(slug)?;
std::fs::write(&path, content).context("Failed to write page")
}
/// Check if a page exists by slug.
#[must_use]
pub fn page_exists(&self, slug: &str) -> bool {
self.safe_page_path(slug).is_ok_and(|path| path.exists())
}
/// Delete a page by slug.
///
/// # Errors
/// Returns an error if the slug is invalid or the page does not exist.
pub fn delete_page(&self, slug: &str) -> Result<()> {
let path = self.safe_page_path(slug)?;
if !path.exists() {
bail!("Page '{slug}' not found");
}
std::fs::remove_file(&path).context("Failed to delete page")
}
}