Skip to main content

llm_wiki_lib/
wiki.rs

1//! Core Wiki struct — manages wiki directory and provides pipeline entry points.
2
3use std::path::PathBuf;
4use walkdir::WalkDir;
5
6use crate::config::Config;
7use crate::ingest::{self, IngestResult};
8use crate::lint::{self, LintResult};
9use crate::llm::LlmClient;
10use crate::query::{self, QueryResult};
11use crate::rebuild::{self, RebuildResult};
12
13/// Wiki — main entry point for the LLM Wiki SDK.
14///
15/// ```no_run
16/// # async fn run() -> anyhow::Result<()> {
17/// use llm_wiki_lib::Wiki;
18///
19/// let wiki = Wiki::new(Default::default()).await?;
20/// let status = wiki.status()?;
21/// println!("Wiki has {} entries", status.file_count);
22/// # Ok(())
23/// # }
24/// ```
25#[derive(Clone)]
26pub struct Wiki {
27    config: Config,
28    llm: LlmClient,
29}
30
31impl Wiki {
32    /// Create a new Wiki instance, loading config and initialising the LLM client.
33    pub async fn new(config: Config) -> anyhow::Result<Self> {
34        let llm_config = config.resolved_llm()?;
35        let llm = LlmClient::new(llm_config);
36
37        // Ensure wiki directory exists
38        config.wiki_dir()?;
39
40        Ok(Self { config, llm })
41    }
42
43    /// Create a Wiki instance without LLM (for local-only operations like status).
44    pub fn new_local(config: Config) -> anyhow::Result<Self> {
45        // Use a placeholder LLM config — won't be used for local-only commands
46        let llm = LlmClient::new(crate::config::LlmConfig {
47            provider: crate::config::LlmProvider::Anthropic,
48            model: String::new(),
49            api_key: String::new(),
50            base_url: None,
51        });
52
53        // Ensure wiki directory exists
54        config.wiki_dir()?;
55
56        Ok(Self { config, llm })
57    }
58
59    /// Create from environment variables only (no config file).
60    pub async fn from_env() -> anyhow::Result<Self> {
61        Self::new(Config::default()).await
62    }
63
64    // ── Pipelines ──────────────────────────────────────────────
65
66    /// Ingest a source file into the wiki.
67    ///
68    /// Reads the file, extracts text, calls the LLM to generate wiki content,
69    /// and writes the result to `wiki/`.
70    ///
71    /// `source_path` is relative to the workspace or absolute.
72    pub async fn ingest(&self, source_path: &str) -> anyhow::Result<IngestResult> {
73        ingest::run(self, source_path).await
74    }
75
76    /// Query the wiki with a natural-language question.
77    ///
78    /// Reads relevant wiki files and asks the LLM to synthesise an answer.
79    pub async fn query(&self, question: &str) -> anyhow::Result<QueryResult> {
80        query::run(self, question).await
81    }
82
83    /// Lint the wiki for health issues (deadlinks, duplicates, stale content).
84    pub async fn lint(&self) -> anyhow::Result<LintResult> {
85        lint::run(self).await
86    }
87
88    /// Build the wikilink knowledge graph with backlinks.
89    pub fn graph(&self) -> anyhow::Result<crate::graph::GraphResult> {
90        crate::graph::run(self)
91    }
92
93    /// Rebuild wiki entries for source files modified since `since`.
94    ///
95    /// Only re-ingests source files whose mtime is newer than `since`.
96    /// If `dry_run` is true, returns the list of files that would be rebuilt without rebuilding them.
97    pub async fn rebuild_since(
98        &self,
99        since: chrono::DateTime<chrono::Utc>,
100        dry_run: bool,
101    ) -> anyhow::Result<RebuildResult> {
102        rebuild::run(self, since, dry_run).await
103    }
104
105    /// Initialise the wiki directory structure and a default SYSTEM.md.
106    ///
107    /// Creates the wiki and workspace directories, plus a default `SYSTEM.md`
108    /// if it doesn't exist. Returns the paths created.
109    pub fn init(&self) -> anyhow::Result<crate::init::InitResult> {
110        crate::init::run(self)
111    }
112
113    // ── Status ─────────────────────────────────────────────────
114
115    /// Return statistics about the wiki directory.
116    pub fn status(&self) -> anyhow::Result<Status> {
117        let wiki_dir = &self.config.paths.wiki;
118
119        if !wiki_dir.exists() {
120            return Ok(Status {
121                wiki_dir: wiki_dir.clone(),
122                file_count: 0,
123                total_bytes: 0,
124                last_modified: None,
125                issues: vec![],
126            });
127        }
128
129        let mut file_count = 0u32;
130        let mut total_bytes = 0u64;
131        let mut last_modified: Option<chrono::DateTime<chrono::Utc>> = None;
132
133        for entry in WalkDir::new(wiki_dir)
134            .into_iter()
135            .filter_map(|e| e.ok())
136            .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
137        {
138            if let Ok(meta) = entry.metadata() {
139                file_count += 1;
140                total_bytes += meta.len();
141                if let Ok(modified) = meta.modified() {
142                    let dt: chrono::DateTime<chrono::Utc> = modified.into();
143                    last_modified = last_modified
144                        .map(|l| if dt > l { dt } else { l })
145                        .or(Some(dt));
146                }
147            }
148        }
149
150        Ok(Status {
151            wiki_dir: wiki_dir.clone(),
152            file_count,
153            total_bytes,
154            last_modified,
155            issues: vec![],
156        })
157    }
158
159    // ── Internal accessors ─────────────────────────────────────
160
161    pub(crate) fn config(&self) -> &Config {
162        &self.config
163    }
164
165    pub(crate) fn llm(&self) -> &LlmClient {
166        &self.llm
167    }
168
169    /// Resolve a source file path relative to the workspace.
170    pub(crate) fn resolve_source(&self, path: &str) -> PathBuf {
171        let p = PathBuf::from(path);
172        if p.is_absolute() {
173            p
174        } else {
175            self.config.workspace_dir().join(path)
176        }
177    }
178
179    /// Read all .md files in the wiki directory.
180    pub(crate) fn read_wiki_files(&self) -> anyhow::Result<Vec<WikiFile>> {
181        let wiki_dir = &self.config.paths.wiki;
182        if !wiki_dir.exists() {
183            return Ok(vec![]);
184        }
185
186        let mut files = Vec::new();
187        for entry in WalkDir::new(wiki_dir)
188            .into_iter()
189            .filter_map(|e| e.ok())
190            .filter(|e| e.path().extension().is_some_and(|ext| ext == "md"))
191        {
192            let path = entry.path();
193            let content = std::fs::read_to_string(path)?;
194            let modified = entry
195                .metadata()
196                .ok()
197                .and_then(|m| m.modified().ok())
198                .map(|t| t.into());
199
200            files.push(WikiFile {
201                path: path.to_path_buf(),
202                content,
203                modified,
204            });
205        }
206
207        files.sort_by_key(|f| f.modified);
208        Ok(files)
209    }
210
211    /// Write a wiki entry to disk.
212    pub(crate) fn write_wiki_entry(&self, title: &str, content: &str) -> anyhow::Result<PathBuf> {
213        let safe_name = sanitize_filename(title);
214        let wiki_dir = self.config.wiki_dir()?;
215        let path = wiki_dir.join(format!("{}.md", safe_name));
216
217        // Add source metadata header
218        let with_header = format!(
219            "<!-- source: ingested -->\n<!-- generated: {} -->\n\n{}",
220            chrono::Utc::now().to_rfc3339(),
221            content
222        );
223
224        std::fs::write(&path, with_header)?;
225        Ok(path)
226    }
227}
228
229/// Sanitize a string for use as a filename.
230fn sanitize_filename(name: &str) -> String {
231    let s = name.trim();
232    let re = regex::Regex::new(r"[^\p{L}\p{N}\-_ ]").unwrap();
233    let s = re.replace_all(s, "_");
234    let re2 = regex::Regex::new(r"_+").unwrap();
235    let s = re2.replace_all(&s, "_");
236    s.split_whitespace().take(80).collect::<Vec<_>>().join("_")
237}
238
239/// A single wiki Markdown file.
240#[derive(Debug, Clone)]
241pub struct WikiFile {
242    pub path: PathBuf,
243    pub content: String,
244    pub modified: Option<chrono::DateTime<chrono::Utc>>,
245}
246
247/// Wiki statistics.
248#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
249pub struct Status {
250    pub wiki_dir: PathBuf,
251    pub file_count: u32,
252    pub total_bytes: u64,
253    pub last_modified: Option<chrono::DateTime<chrono::Utc>>,
254    #[serde(default)]
255    pub issues: Vec<String>,
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    #[test]
263    fn test_sanitize_filename() {
264        assert_eq!(sanitize_filename("Q1 摘要 2024"), "Q1_摘要_2024");
265        assert_eq!(sanitize_filename("Hello/World"), "Hello_World");
266        assert_eq!(sanitize_filename("  多个  空格  "), "多个_空格");
267    }
268}