Skip to main content

thoughts_tool/documents/
mod.rs

1//! Library-level document management for `thoughts_tool`.
2//!
3//! This module provides reusable functions for writing and listing documents,
4//! and is used by both the MCP layer and other crates that depend on `thoughts_tool`.
5
6use crate::error::Result as TResult;
7use crate::error::ThoughtsError;
8use crate::repo_identity::RepoIdentity;
9use crate::utils::validation::validate_simple_filename;
10use crate::workspace::ActiveWork;
11use crate::workspace::ensure_active_work;
12use atomicwrites::AtomicFile;
13use atomicwrites::OverwriteBehavior;
14use chrono::DateTime;
15use chrono::Utc;
16use percent_encoding::AsciiSet;
17use percent_encoding::CONTROLS;
18use percent_encoding::utf8_percent_encode;
19use schemars::JsonSchema;
20use serde::Deserialize;
21use serde::Serialize;
22use std::fs;
23use std::path::PathBuf;
24
25/// Document type categories for thoughts workspace.
26#[derive(Debug, Clone, Serialize, JsonSchema)]
27#[serde(rename_all = "snake_case")]
28pub enum DocumentType {
29    Research,
30    Plan,
31    Artifact,
32    Log,
33}
34
35impl DocumentType {
36    /// Returns the path for this document type's directory within `ActiveWork`.
37    pub fn subdir<'a>(&self, aw: &'a ActiveWork) -> &'a PathBuf {
38        match self {
39            Self::Research => &aw.research,
40            Self::Plan => &aw.plans,
41            Self::Artifact => &aw.artifacts,
42            Self::Log => &aw.logs,
43        }
44    }
45
46    /// Returns the plural directory name (for physical directory paths).
47    /// Note: serde serialization uses singular forms ("plan", "artifact", "research", "log"),
48    /// while physical directories use plural forms ("plans", "artifacts", "research", "logs").
49    /// This matches conventional filesystem naming while keeping API values consistent.
50    pub fn subdir_name(&self) -> &'static str {
51        match self {
52            Self::Research => "research",
53            Self::Plan => "plans",
54            Self::Artifact => "artifacts",
55            Self::Log => "logs",
56        }
57    }
58
59    /// Returns the singular label for this document type (used in output/reporting).
60    pub fn singular_label(&self) -> &'static str {
61        match self {
62            Self::Research => "research",
63            Self::Plan => "plan",
64            Self::Artifact => "artifact",
65            Self::Log => "log",
66        }
67    }
68}
69
70// Custom deserializer: accept singular/plural in a case-insensitive manner
71impl<'de> serde::Deserialize<'de> for DocumentType {
72    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
73    where
74        D: serde::Deserializer<'de>,
75    {
76        let s = String::deserialize(deserializer)?;
77        let norm = s.trim().to_ascii_lowercase();
78        match norm.as_str() {
79            "research" => Ok(Self::Research),
80            "plan" | "plans" => Ok(Self::Plan),
81            "artifact" | "artifacts" => Ok(Self::Artifact),
82            "log" | "logs" => Ok(Self::Log), // accepts both for backward compat
83            other => Err(serde::de::Error::custom(format!(
84                "invalid doc_type '{other}'; expected research|plan(s)|artifact(s)|log(s)"
85            ))),
86        }
87    }
88}
89
90/// Result of successfully writing a document.
91#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
92pub struct WriteDocumentOk {
93    pub path: String,
94    pub bytes_written: u64,
95    /// GitHub URL for the document (available after sync).
96    /// None if the remote is not GitHub-hosted or URL couldn't be computed.
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub github_url: Option<String>,
99}
100
101/// Metadata about a single document file.
102#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
103pub struct DocumentInfo {
104    pub path: String,
105    pub doc_type: String,
106    pub size: u64,
107    pub modified: String,
108}
109
110/// Result of listing documents in the active work directory.
111#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
112pub struct ActiveDocuments {
113    pub base: String,
114    pub files: Vec<DocumentInfo>,
115}
116
117/// Compute GitHub blob URL if the remote is GitHub-hosted.
118///
119/// Returns None if:
120/// - No remote URL is available
121/// - No git ref is available
122/// - The remote is not GitHub-hosted
123/// - The URL couldn't be parsed
124const GITHUB_PATH_SEGMENT_ENCODE_SET: &AsciiSet = &CONTROLS
125    .add(b' ')
126    .add(b'!')
127    .add(b'"')
128    .add(b'#')
129    .add(b'$')
130    .add(b'%')
131    .add(b'&')
132    .add(b'\'')
133    .add(b'(')
134    .add(b')')
135    .add(b'*')
136    .add(b'+')
137    .add(b',')
138    .add(b':')
139    .add(b';')
140    .add(b'<')
141    .add(b'=')
142    .add(b'>')
143    .add(b'?')
144    .add(b'@')
145    .add(b'[')
146    .add(b'\\')
147    .add(b']')
148    .add(b'^')
149    .add(b'`')
150    .add(b'{')
151    .add(b'|')
152    .add(b'}');
153
154fn encode_path_segment(value: &str) -> String {
155    value
156        .split('/')
157        .map(|segment| utf8_percent_encode(segment, GITHUB_PATH_SEGMENT_ENCODE_SET).to_string())
158        .collect::<Vec<_>>()
159        .join("/")
160}
161
162fn compute_github_url(
163    remote_url: Option<&str>,
164    repo_subpath: Option<&str>,
165    git_ref: Option<&str>,
166    dir_name: &str,
167    doc_type: &DocumentType,
168    filename: &str,
169) -> Option<String> {
170    let remote = remote_url?;
171    let git_ref = git_ref?;
172    let identity = RepoIdentity::parse(remote).ok()?;
173
174    // Only generate URLs for GitHub
175    if identity.host != "github.com" {
176        return None;
177    }
178
179    // Guard against single-segment remotes that produce empty org_path
180    if identity.org_path.is_empty() {
181        return None;
182    }
183
184    // Build the path within the repo
185    // Structure: {subpath}/{dir_name}/{doc_type_dir}/{filename}
186    let mut path_parts = Vec::new();
187    if let Some(subpath) = repo_subpath {
188        let subpath = subpath.trim().trim_matches('/');
189        if !subpath.is_empty() {
190            path_parts.push(encode_path_segment(subpath));
191        }
192    }
193    path_parts.push(encode_path_segment(dir_name));
194    path_parts.push(doc_type.subdir_name().to_string());
195    path_parts.push(encode_path_segment(filename));
196
197    let path_in_repo = path_parts.join("/");
198
199    Some(format!(
200        "https://github.com/{}/{}/blob/{}/{}",
201        encode_path_segment(&identity.org_path),
202        encode_path_segment(&identity.repo),
203        encode_path_segment(git_ref),
204        path_in_repo
205    ))
206}
207
208/// Write a document to the active work directory.
209///
210/// # Arguments
211/// * `doc_type` - The type of document (research, plan, artifact, log)
212/// * `filename` - The filename (validated for safety)
213/// * `content` - The content to write
214///
215/// # Returns
216/// A `WriteDocumentOk` with the path, bytes written, and optional GitHub URL on success.
217pub fn write_document(
218    doc_type: &DocumentType,
219    filename: &str,
220    content: &str,
221) -> TResult<WriteDocumentOk> {
222    validate_simple_filename(filename)?;
223    let aw = ensure_active_work()?;
224    let dir = doc_type.subdir(&aw);
225    let target = dir.join(filename);
226    let bytes_written = content.len() as u64;
227
228    AtomicFile::new(&target, OverwriteBehavior::AllowOverwrite)
229        .write(|f| std::io::Write::write_all(f, content.as_bytes()))
230        .map_err(|e| ThoughtsError::Io(std::io::Error::other(e)))?;
231
232    let github_url = compute_github_url(
233        aw.remote_url.as_deref(),
234        aw.repo_subpath.as_deref(),
235        aw.thoughts_git_ref.as_deref(),
236        &aw.dir_name,
237        doc_type,
238        filename,
239    );
240
241    Ok(WriteDocumentOk {
242        path: format!(
243            "./thoughts/{}/{}/{}",
244            aw.dir_name,
245            doc_type.subdir_name(),
246            filename
247        ),
248        bytes_written,
249        github_url,
250    })
251}
252
253/// List documents in the active work directory.
254///
255/// # Arguments
256/// * `subdir` - Optional filter for a specific document type. If None, lists research, plans, artifacts
257///   (but NOT logs by default - logs must be explicitly requested).
258///
259/// # Returns
260/// An `ActiveDocuments` with the base path and list of files.
261pub fn list_documents(subdir: Option<&DocumentType>) -> TResult<ActiveDocuments> {
262    let aw = ensure_active_work()?;
263    let base = format!("./thoughts/{}", aw.dir_name);
264
265    // Determine which subdirs to scan
266    // Tuple: (singular_label for doc_type output, plural_dirname for paths, PathBuf)
267    let sets: Vec<(&str, &str, PathBuf)> = match subdir {
268        Some(d) => {
269            vec![(d.singular_label(), d.subdir_name(), d.subdir(&aw).clone())]
270        }
271        None => vec![
272            ("research", "research", aw.research.clone()),
273            ("plan", "plans", aw.plans.clone()),
274            ("artifact", "artifacts", aw.artifacts),
275            // Do NOT include logs by default - must be explicitly requested
276        ],
277    };
278
279    let mut files = Vec::new();
280    for (singular_label, dirname, dir) in sets {
281        if !dir.exists() {
282            continue;
283        }
284        for entry in fs::read_dir(&dir)? {
285            let entry = entry?;
286            let meta = entry.metadata()?;
287            if meta.is_file() {
288                let modified: DateTime<Utc> = meta
289                    .modified()
290                    .map_or_else(|_| Utc::now(), std::convert::Into::into);
291                let file_name = entry.file_name().to_string_lossy().to_string();
292                files.push(DocumentInfo {
293                    path: format!("{base}/{dirname}/{file_name}"),
294                    doc_type: singular_label.to_string(),
295                    size: meta.len(),
296                    modified: modified.to_rfc3339(),
297                });
298            }
299        }
300    }
301
302    Ok(ActiveDocuments { base, files })
303}
304
305/// Get the path to the logs directory in the active work, ensuring it exists.
306///
307/// This is a convenience function for other crates that need to write log files
308/// directly (e.g., `agentic_logging`).
309///
310/// # Returns
311/// The absolute path to the logs directory.
312pub fn active_logs_dir() -> TResult<PathBuf> {
313    let aw = ensure_active_work()?;
314    if !aw.logs.exists() {
315        std::fs::create_dir_all(&aw.logs)?;
316    }
317    Ok(aw.logs)
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    #[test]
325    fn test_document_type_deserialize_singular() {
326        let research: DocumentType = serde_json::from_str("\"research\"").unwrap();
327        assert!(matches!(research, DocumentType::Research));
328
329        let plan: DocumentType = serde_json::from_str("\"plan\"").unwrap();
330        assert!(matches!(plan, DocumentType::Plan));
331
332        let artifact: DocumentType = serde_json::from_str("\"artifact\"").unwrap();
333        assert!(matches!(artifact, DocumentType::Artifact));
334
335        let log: DocumentType = serde_json::from_str("\"log\"").unwrap();
336        assert!(matches!(log, DocumentType::Log));
337    }
338
339    #[test]
340    fn test_document_type_deserialize_plural() {
341        let plans: DocumentType = serde_json::from_str("\"plans\"").unwrap();
342        assert!(matches!(plans, DocumentType::Plan));
343
344        let artifacts: DocumentType = serde_json::from_str("\"artifacts\"").unwrap();
345        assert!(matches!(artifacts, DocumentType::Artifact));
346
347        let logs: DocumentType = serde_json::from_str("\"logs\"").unwrap();
348        assert!(matches!(logs, DocumentType::Log));
349    }
350
351    #[test]
352    fn test_document_type_deserialize_case_insensitive() {
353        let plan: DocumentType = serde_json::from_str("\"PLAN\"").unwrap();
354        assert!(matches!(plan, DocumentType::Plan));
355
356        let research: DocumentType = serde_json::from_str("\"Research\"").unwrap();
357        assert!(matches!(research, DocumentType::Research));
358
359        let log: DocumentType = serde_json::from_str("\"LOG\"").unwrap();
360        assert!(matches!(log, DocumentType::Log));
361
362        let logs: DocumentType = serde_json::from_str("\"LOGS\"").unwrap();
363        assert!(matches!(logs, DocumentType::Log));
364    }
365
366    #[test]
367    fn test_document_type_deserialize_invalid() {
368        let result: Result<DocumentType, _> = serde_json::from_str("\"invalid\"");
369        assert!(result.is_err());
370        let err = result.unwrap_err().to_string();
371        assert!(err.contains("invalid doc_type"));
372    }
373
374    #[test]
375    fn test_document_type_serialize() {
376        let plan = DocumentType::Plan;
377        let serialized = serde_json::to_string(&plan).unwrap();
378        assert_eq!(serialized, "\"plan\"");
379
380        let artifact = DocumentType::Artifact;
381        let serialized = serde_json::to_string(&artifact).unwrap();
382        assert_eq!(serialized, "\"artifact\"");
383
384        let log = DocumentType::Log;
385        let serialized = serde_json::to_string(&log).unwrap();
386        assert_eq!(serialized, "\"log\"");
387    }
388
389    #[test]
390    fn test_subdir_names() {
391        assert_eq!(DocumentType::Research.subdir_name(), "research");
392        assert_eq!(DocumentType::Plan.subdir_name(), "plans");
393        assert_eq!(DocumentType::Artifact.subdir_name(), "artifacts");
394        assert_eq!(DocumentType::Log.subdir_name(), "logs");
395    }
396
397    #[test]
398    fn test_singular_labels() {
399        assert_eq!(DocumentType::Research.singular_label(), "research");
400        assert_eq!(DocumentType::Plan.singular_label(), "plan");
401        assert_eq!(DocumentType::Artifact.singular_label(), "artifact");
402        assert_eq!(DocumentType::Log.singular_label(), "log");
403    }
404
405    #[test]
406    fn test_compute_github_url_ssh() {
407        let url = compute_github_url(
408            Some("git@github.com:org/repo.git"),
409            None,
410            Some("main"),
411            "main",
412            &DocumentType::Research,
413            "doc.md",
414        );
415        assert_eq!(
416            url,
417            Some("https://github.com/org/repo/blob/main/main/research/doc.md".to_string())
418        );
419    }
420
421    #[test]
422    fn test_compute_github_url_https() {
423        let url = compute_github_url(
424            Some("https://github.com/org/repo.git"),
425            Some("docs/thoughts"),
426            Some("main"),
427            "feature-branch",
428            &DocumentType::Plan,
429            "plan.md",
430        );
431        assert_eq!(
432            url,
433            Some(
434                "https://github.com/org/repo/blob/main/docs/thoughts/feature-branch/plans/plan.md"
435                    .to_string()
436            )
437        );
438    }
439
440    #[test]
441    fn test_compute_github_url_non_github() {
442        let url = compute_github_url(
443            Some("git@gitlab.com:org/repo.git"),
444            None,
445            Some("main"),
446            "main",
447            &DocumentType::Research,
448            "doc.md",
449        );
450        assert_eq!(url, None);
451    }
452
453    #[test]
454    fn test_compute_github_url_none_remote() {
455        let url = compute_github_url(
456            None,
457            None,
458            Some("main"),
459            "main",
460            &DocumentType::Research,
461            "doc.md",
462        );
463        assert_eq!(url, None);
464    }
465
466    #[test]
467    fn test_compute_github_url_no_subpath() {
468        let url = compute_github_url(
469            Some("git@github.com:General-Wisdom/thoughts.git"),
470            None,
471            Some("main"),
472            "allison-feature",
473            &DocumentType::Artifact,
474            "test.md",
475        );
476        assert_eq!(
477            url,
478            Some("https://github.com/General-Wisdom/thoughts/blob/main/allison-feature/artifacts/test.md".to_string())
479        );
480    }
481
482    #[test]
483    fn test_compute_github_url_empty_org_path() {
484        // Single-segment remotes produce empty org_path; should return None
485        // to avoid malformed URLs like https://github.com//repo/...
486        let url = compute_github_url(
487            Some("git@github.com:repo.git"),
488            None,
489            Some("main"),
490            "main",
491            &DocumentType::Research,
492            "doc.md",
493        );
494        assert_eq!(url, None);
495    }
496
497    #[test]
498    fn test_compute_github_url_slash_branch() {
499        let url = compute_github_url(
500            Some("git@github.com:org/repo.git"),
501            None,
502            Some("main"),
503            "feature/login",
504            &DocumentType::Research,
505            "notes.md",
506        );
507        assert_eq!(
508            url,
509            Some(
510                "https://github.com/org/repo/blob/main/feature/login/research/notes.md".to_string()
511            )
512        );
513    }
514
515    #[test]
516    fn test_compute_github_url_special_chars() {
517        let url = compute_github_url(
518            Some("git@github.com:org/repo.git"),
519            None,
520            Some("main"),
521            "feat#1%",
522            &DocumentType::Plan,
523            "plan.md",
524        );
525        assert_eq!(
526            url,
527            Some("https://github.com/org/repo/blob/main/feat%231%25/plans/plan.md".to_string())
528        );
529    }
530
531    #[test]
532    fn test_compute_github_url_detached_head() {
533        let url = compute_github_url(
534            Some("git@github.com:org/repo.git"),
535            None,
536            None,
537            "some-branch",
538            &DocumentType::Research,
539            "doc.md",
540        );
541        assert_eq!(url, None);
542    }
543
544    #[test]
545    fn test_compute_github_url_space_in_branch() {
546        let url = compute_github_url(
547            Some("git@github.com:org/repo.git"),
548            None,
549            Some("main"),
550            "my branch",
551            &DocumentType::Artifact,
552            "out.md",
553        );
554        assert_eq!(
555            url,
556            Some("https://github.com/org/repo/blob/main/my%20branch/artifacts/out.md".to_string())
557        );
558    }
559}