Skip to main content

thoughts_tool/documents/
mod.rs

1//! Library-level document management for thoughts_tool.
2//!
3//! This module provides reusable functions for writing and listing documents,
4//! and is used by both the MCP layer and other crates that depend on thoughts_tool.
5
6use crate::error::Result as TResult;
7use crate::error::ThoughtsError;
8use crate::repo_identity::RepoIdentity;
9use crate::utils::validation::validate_simple_filename;
10use crate::workspace::ActiveWork;
11use crate::workspace::ensure_active_work;
12use atomicwrites::AtomicFile;
13use atomicwrites::OverwriteBehavior;
14use chrono::DateTime;
15use chrono::Utc;
16use percent_encoding::AsciiSet;
17use percent_encoding::CONTROLS;
18use percent_encoding::utf8_percent_encode;
19use schemars::JsonSchema;
20use serde::Deserialize;
21use serde::Serialize;
22use std::fs;
23use std::path::PathBuf;
24
25/// Document type categories for thoughts workspace.
26#[derive(Debug, Clone, Serialize, JsonSchema)]
27#[serde(rename_all = "snake_case")]
28pub enum DocumentType {
29    Research,
30    Plan,
31    Artifact,
32    Log,
33}
34
35impl DocumentType {
36    /// Returns the path for this document type's directory within ActiveWork.
37    pub fn subdir<'a>(&self, aw: &'a ActiveWork) -> &'a PathBuf {
38        match self {
39            DocumentType::Research => &aw.research,
40            DocumentType::Plan => &aw.plans,
41            DocumentType::Artifact => &aw.artifacts,
42            DocumentType::Log => &aw.logs,
43        }
44    }
45
46    /// Returns the plural directory name (for physical directory paths).
47    /// Note: serde serialization uses singular forms ("plan", "artifact", "research", "log"),
48    /// while physical directories use plural forms ("plans", "artifacts", "research", "logs").
49    /// This matches conventional filesystem naming while keeping API values consistent.
50    pub fn subdir_name(&self) -> &'static str {
51        match self {
52            DocumentType::Research => "research",
53            DocumentType::Plan => "plans",
54            DocumentType::Artifact => "artifacts",
55            DocumentType::Log => "logs",
56        }
57    }
58
59    /// Returns the singular label for this document type (used in output/reporting).
60    pub fn singular_label(&self) -> &'static str {
61        match self {
62            DocumentType::Research => "research",
63            DocumentType::Plan => "plan",
64            DocumentType::Artifact => "artifact",
65            DocumentType::Log => "log",
66        }
67    }
68}
69
70// Custom deserializer: accept singular/plural in a case-insensitive manner
71impl<'de> serde::Deserialize<'de> for DocumentType {
72    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
73    where
74        D: serde::Deserializer<'de>,
75    {
76        let s = String::deserialize(deserializer)?;
77        let norm = s.trim().to_ascii_lowercase();
78        match norm.as_str() {
79            "research" => Ok(DocumentType::Research),
80            "plan" | "plans" => Ok(DocumentType::Plan),
81            "artifact" | "artifacts" => Ok(DocumentType::Artifact),
82            "log" | "logs" => Ok(DocumentType::Log), // accepts both for backward compat
83            other => Err(serde::de::Error::custom(format!(
84                "invalid doc_type '{}'; expected research|plan(s)|artifact(s)|log(s)",
85                other
86            ))),
87        }
88    }
89}
90
91/// Result of successfully writing a document.
92#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
93pub struct WriteDocumentOk {
94    pub path: String,
95    pub bytes_written: u64,
96    /// GitHub URL for the document (available after sync).
97    /// None if the remote is not GitHub-hosted or URL couldn't be computed.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub github_url: Option<String>,
100}
101
102/// Metadata about a single document file.
103#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
104pub struct DocumentInfo {
105    pub path: String,
106    pub doc_type: String,
107    pub size: u64,
108    pub modified: String,
109}
110
111/// Result of listing documents in the active work directory.
112#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
113pub struct ActiveDocuments {
114    pub base: String,
115    pub files: Vec<DocumentInfo>,
116}
117
118/// Compute GitHub blob URL if the remote is GitHub-hosted.
119///
120/// Returns None if:
121/// - No remote URL is available
122/// - No git ref is available
123/// - The remote is not GitHub-hosted
124/// - The URL couldn't be parsed
125const GITHUB_PATH_SEGMENT_ENCODE_SET: &AsciiSet = &CONTROLS
126    .add(b' ')
127    .add(b'!')
128    .add(b'"')
129    .add(b'#')
130    .add(b'$')
131    .add(b'%')
132    .add(b'&')
133    .add(b'\'')
134    .add(b'(')
135    .add(b')')
136    .add(b'*')
137    .add(b'+')
138    .add(b',')
139    .add(b':')
140    .add(b';')
141    .add(b'<')
142    .add(b'=')
143    .add(b'>')
144    .add(b'?')
145    .add(b'@')
146    .add(b'[')
147    .add(b'\\')
148    .add(b']')
149    .add(b'^')
150    .add(b'`')
151    .add(b'{')
152    .add(b'|')
153    .add(b'}');
154
155fn encode_path_segment(value: &str) -> String {
156    value
157        .split('/')
158        .map(|segment| utf8_percent_encode(segment, GITHUB_PATH_SEGMENT_ENCODE_SET).to_string())
159        .collect::<Vec<_>>()
160        .join("/")
161}
162
163fn compute_github_url(
164    remote_url: Option<&str>,
165    repo_subpath: Option<&str>,
166    git_ref: Option<&str>,
167    dir_name: &str,
168    doc_type: &DocumentType,
169    filename: &str,
170) -> Option<String> {
171    let remote = remote_url?;
172    let git_ref = git_ref?;
173    let identity = RepoIdentity::parse(remote).ok()?;
174
175    // Only generate URLs for GitHub
176    if identity.host != "github.com" {
177        return None;
178    }
179
180    // Guard against single-segment remotes that produce empty org_path
181    if identity.org_path.is_empty() {
182        return None;
183    }
184
185    // Build the path within the repo
186    // Structure: {subpath}/{dir_name}/{doc_type_dir}/{filename}
187    let mut path_parts = Vec::new();
188    if let Some(subpath) = repo_subpath {
189        let subpath = subpath.trim().trim_matches('/');
190        if !subpath.is_empty() {
191            path_parts.push(encode_path_segment(subpath));
192        }
193    }
194    path_parts.push(encode_path_segment(dir_name));
195    path_parts.push(doc_type.subdir_name().to_string());
196    path_parts.push(encode_path_segment(filename));
197
198    let path_in_repo = path_parts.join("/");
199
200    Some(format!(
201        "https://github.com/{}/{}/blob/{}/{}",
202        encode_path_segment(&identity.org_path),
203        encode_path_segment(&identity.repo),
204        encode_path_segment(git_ref),
205        path_in_repo
206    ))
207}
208
209/// Write a document to the active work directory.
210///
211/// # Arguments
212/// * `doc_type` - The type of document (research, plan, artifact, log)
213/// * `filename` - The filename (validated for safety)
214/// * `content` - The content to write
215///
216/// # Returns
217/// A `WriteDocumentOk` with the path, bytes written, and optional GitHub URL on success.
218pub fn write_document(
219    doc_type: DocumentType,
220    filename: &str,
221    content: &str,
222) -> TResult<WriteDocumentOk> {
223    validate_simple_filename(filename)?;
224    let aw = ensure_active_work()?;
225    let dir = doc_type.subdir(&aw);
226    let target = dir.join(filename);
227    let bytes_written = content.len() as u64;
228
229    AtomicFile::new(&target, OverwriteBehavior::AllowOverwrite)
230        .write(|f| std::io::Write::write_all(f, content.as_bytes()))
231        .map_err(|e| ThoughtsError::Io(std::io::Error::other(e)))?;
232
233    let github_url = compute_github_url(
234        aw.remote_url.as_deref(),
235        aw.repo_subpath.as_deref(),
236        aw.thoughts_git_ref.as_deref(),
237        &aw.dir_name,
238        &doc_type,
239        filename,
240    );
241
242    Ok(WriteDocumentOk {
243        path: format!(
244            "./thoughts/{}/{}/{}",
245            aw.dir_name,
246            doc_type.subdir_name(),
247            filename
248        ),
249        bytes_written,
250        github_url,
251    })
252}
253
254/// List documents in the active work directory.
255///
256/// # Arguments
257/// * `subdir` - Optional filter for a specific document type. If None, lists research, plans, artifacts
258///   (but NOT logs by default - logs must be explicitly requested).
259///
260/// # Returns
261/// An `ActiveDocuments` with the base path and list of files.
262pub fn list_documents(subdir: Option<DocumentType>) -> TResult<ActiveDocuments> {
263    let aw = ensure_active_work()?;
264    let base = format!("./thoughts/{}", aw.dir_name);
265
266    // Determine which subdirs to scan
267    // Tuple: (singular_label for doc_type output, plural_dirname for paths, PathBuf)
268    let sets: Vec<(&str, &str, PathBuf)> = match subdir {
269        Some(ref d) => {
270            vec![(d.singular_label(), d.subdir_name(), d.subdir(&aw).clone())]
271        }
272        None => vec![
273            ("research", "research", aw.research.clone()),
274            ("plan", "plans", aw.plans.clone()),
275            ("artifact", "artifacts", aw.artifacts.clone()),
276            // Do NOT include logs by default - must be explicitly requested
277        ],
278    };
279
280    let mut files = Vec::new();
281    for (singular_label, dirname, dir) in sets {
282        if !dir.exists() {
283            continue;
284        }
285        for entry in fs::read_dir(&dir)? {
286            let entry = entry?;
287            let meta = entry.metadata()?;
288            if meta.is_file() {
289                let modified: DateTime<Utc> = meta
290                    .modified()
291                    .map(|t| t.into())
292                    .unwrap_or_else(|_| Utc::now());
293                let file_name = entry.file_name().to_string_lossy().to_string();
294                files.push(DocumentInfo {
295                    path: format!("{}/{}/{}", base, dirname, file_name),
296                    doc_type: singular_label.to_string(),
297                    size: meta.len(),
298                    modified: modified.to_rfc3339(),
299                });
300            }
301        }
302    }
303
304    Ok(ActiveDocuments { base, files })
305}
306
307/// Get the path to the logs directory in the active work, ensuring it exists.
308///
309/// This is a convenience function for other crates that need to write log files
310/// directly (e.g., agentic_logging).
311///
312/// # Returns
313/// The absolute path to the logs directory.
314pub fn active_logs_dir() -> TResult<PathBuf> {
315    let aw = ensure_active_work()?;
316    if !aw.logs.exists() {
317        std::fs::create_dir_all(&aw.logs)?;
318    }
319    Ok(aw.logs.clone())
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    #[test]
327    fn test_document_type_deserialize_singular() {
328        let research: DocumentType = serde_json::from_str("\"research\"").unwrap();
329        assert!(matches!(research, DocumentType::Research));
330
331        let plan: DocumentType = serde_json::from_str("\"plan\"").unwrap();
332        assert!(matches!(plan, DocumentType::Plan));
333
334        let artifact: DocumentType = serde_json::from_str("\"artifact\"").unwrap();
335        assert!(matches!(artifact, DocumentType::Artifact));
336
337        let log: DocumentType = serde_json::from_str("\"log\"").unwrap();
338        assert!(matches!(log, DocumentType::Log));
339    }
340
341    #[test]
342    fn test_document_type_deserialize_plural() {
343        let plans: DocumentType = serde_json::from_str("\"plans\"").unwrap();
344        assert!(matches!(plans, DocumentType::Plan));
345
346        let artifacts: DocumentType = serde_json::from_str("\"artifacts\"").unwrap();
347        assert!(matches!(artifacts, DocumentType::Artifact));
348
349        let logs: DocumentType = serde_json::from_str("\"logs\"").unwrap();
350        assert!(matches!(logs, DocumentType::Log));
351    }
352
353    #[test]
354    fn test_document_type_deserialize_case_insensitive() {
355        let plan: DocumentType = serde_json::from_str("\"PLAN\"").unwrap();
356        assert!(matches!(plan, DocumentType::Plan));
357
358        let research: DocumentType = serde_json::from_str("\"Research\"").unwrap();
359        assert!(matches!(research, DocumentType::Research));
360
361        let log: DocumentType = serde_json::from_str("\"LOG\"").unwrap();
362        assert!(matches!(log, DocumentType::Log));
363
364        let logs: DocumentType = serde_json::from_str("\"LOGS\"").unwrap();
365        assert!(matches!(logs, DocumentType::Log));
366    }
367
368    #[test]
369    fn test_document_type_deserialize_invalid() {
370        let result: Result<DocumentType, _> = serde_json::from_str("\"invalid\"");
371        assert!(result.is_err());
372        let err = result.unwrap_err().to_string();
373        assert!(err.contains("invalid doc_type"));
374    }
375
376    #[test]
377    fn test_document_type_serialize() {
378        let plan = DocumentType::Plan;
379        let serialized = serde_json::to_string(&plan).unwrap();
380        assert_eq!(serialized, "\"plan\"");
381
382        let artifact = DocumentType::Artifact;
383        let serialized = serde_json::to_string(&artifact).unwrap();
384        assert_eq!(serialized, "\"artifact\"");
385
386        let log = DocumentType::Log;
387        let serialized = serde_json::to_string(&log).unwrap();
388        assert_eq!(serialized, "\"log\"");
389    }
390
391    #[test]
392    fn test_subdir_names() {
393        assert_eq!(DocumentType::Research.subdir_name(), "research");
394        assert_eq!(DocumentType::Plan.subdir_name(), "plans");
395        assert_eq!(DocumentType::Artifact.subdir_name(), "artifacts");
396        assert_eq!(DocumentType::Log.subdir_name(), "logs");
397    }
398
399    #[test]
400    fn test_singular_labels() {
401        assert_eq!(DocumentType::Research.singular_label(), "research");
402        assert_eq!(DocumentType::Plan.singular_label(), "plan");
403        assert_eq!(DocumentType::Artifact.singular_label(), "artifact");
404        assert_eq!(DocumentType::Log.singular_label(), "log");
405    }
406
407    #[test]
408    fn test_compute_github_url_ssh() {
409        let url = compute_github_url(
410            Some("git@github.com:org/repo.git"),
411            None,
412            Some("main"),
413            "main",
414            &DocumentType::Research,
415            "doc.md",
416        );
417        assert_eq!(
418            url,
419            Some("https://github.com/org/repo/blob/main/main/research/doc.md".to_string())
420        );
421    }
422
423    #[test]
424    fn test_compute_github_url_https() {
425        let url = compute_github_url(
426            Some("https://github.com/org/repo.git"),
427            Some("docs/thoughts"),
428            Some("main"),
429            "feature-branch",
430            &DocumentType::Plan,
431            "plan.md",
432        );
433        assert_eq!(
434            url,
435            Some(
436                "https://github.com/org/repo/blob/main/docs/thoughts/feature-branch/plans/plan.md"
437                    .to_string()
438            )
439        );
440    }
441
442    #[test]
443    fn test_compute_github_url_non_github() {
444        let url = compute_github_url(
445            Some("git@gitlab.com:org/repo.git"),
446            None,
447            Some("main"),
448            "main",
449            &DocumentType::Research,
450            "doc.md",
451        );
452        assert_eq!(url, None);
453    }
454
455    #[test]
456    fn test_compute_github_url_none_remote() {
457        let url = compute_github_url(
458            None,
459            None,
460            Some("main"),
461            "main",
462            &DocumentType::Research,
463            "doc.md",
464        );
465        assert_eq!(url, None);
466    }
467
468    #[test]
469    fn test_compute_github_url_no_subpath() {
470        let url = compute_github_url(
471            Some("git@github.com:General-Wisdom/thoughts.git"),
472            None,
473            Some("main"),
474            "allison-feature",
475            &DocumentType::Artifact,
476            "test.md",
477        );
478        assert_eq!(
479            url,
480            Some("https://github.com/General-Wisdom/thoughts/blob/main/allison-feature/artifacts/test.md".to_string())
481        );
482    }
483
484    #[test]
485    fn test_compute_github_url_empty_org_path() {
486        // Single-segment remotes produce empty org_path; should return None
487        // to avoid malformed URLs like https://github.com//repo/...
488        let url = compute_github_url(
489            Some("git@github.com:repo.git"),
490            None,
491            Some("main"),
492            "main",
493            &DocumentType::Research,
494            "doc.md",
495        );
496        assert_eq!(url, None);
497    }
498
499    #[test]
500    fn test_compute_github_url_slash_branch() {
501        let url = compute_github_url(
502            Some("git@github.com:org/repo.git"),
503            None,
504            Some("main"),
505            "feature/login",
506            &DocumentType::Research,
507            "notes.md",
508        );
509        assert_eq!(
510            url,
511            Some(
512                "https://github.com/org/repo/blob/main/feature/login/research/notes.md".to_string()
513            )
514        );
515    }
516
517    #[test]
518    fn test_compute_github_url_special_chars() {
519        let url = compute_github_url(
520            Some("git@github.com:org/repo.git"),
521            None,
522            Some("main"),
523            "feat#1%",
524            &DocumentType::Plan,
525            "plan.md",
526        );
527        assert_eq!(
528            url,
529            Some("https://github.com/org/repo/blob/main/feat%231%25/plans/plan.md".to_string())
530        );
531    }
532
533    #[test]
534    fn test_compute_github_url_detached_head() {
535        let url = compute_github_url(
536            Some("git@github.com:org/repo.git"),
537            None,
538            None,
539            "some-branch",
540            &DocumentType::Research,
541            "doc.md",
542        );
543        assert_eq!(url, None);
544    }
545
546    #[test]
547    fn test_compute_github_url_space_in_branch() {
548        let url = compute_github_url(
549            Some("git@github.com:org/repo.git"),
550            None,
551            Some("main"),
552            "my branch",
553            &DocumentType::Artifact,
554            "out.md",
555        );
556        assert_eq!(
557            url,
558            Some("https://github.com/org/repo/blob/main/my%20branch/artifacts/out.md".to_string())
559        );
560    }
561}