gobby-wiki 0.2.0

Gobby wiki CLI shell
use std::path::Path;

use crate::WikiError;
use crate::ingest::{
    IngestResult, markdown_title, single_line, text_from_utf8_lossy, write_raw_then_index,
};
use crate::sources::{CompileStatus, IngestionMethod, SourceDraft, SourceKind, SourceManifest};
use crate::store::WikiIndexStore;

const MAX_CODE_FENCE_LEN: usize = 64;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GitFileSnapshot {
    pub path: String,
    pub bytes: Vec<u8>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GitRepositorySnapshot {
    pub remote_url: String,
    pub commit_sha: String,
    pub fetched_at: String,
    pub files: Vec<GitFileSnapshot>,
}

pub fn ingest_repository(
    vault_root: &Path,
    store: &mut impl WikiIndexStore,
    snapshot: GitRepositorySnapshot,
) -> Result<IngestResult, WikiError> {
    if snapshot.files.is_empty() {
        return Err(WikiError::InvalidInput {
            field: "files",
            message: "git repository ingest requires at least one selected file".to_string(),
        });
    }

    let location = format!("git+{}@{}", snapshot.remote_url, snapshot.commit_sha);
    let title = markdown_title(&snapshot.remote_url);
    let draft = SourceDraft {
        location,
        kind: SourceKind::GitRepository,
        fetched_at: snapshot.fetched_at.clone(),
        content: snapshot_content_bytes(&snapshot),
        title: Some(title.clone()),
        citation: Some(format!("{} @ {}", snapshot.remote_url, snapshot.commit_sha)),
        license: None,
        ingestion_method: IngestionMethod::Manual,
        compile_status: CompileStatus::Pending,
    };
    let record = SourceManifest::register(vault_root, draft)?;
    let markdown = render_git_markdown(&snapshot, &title, &record.content_hash);
    write_raw_then_index(vault_root, store, record, &markdown, None)
}

fn snapshot_content_bytes(snapshot: &GitRepositorySnapshot) -> Vec<u8> {
    let mut content = Vec::new();
    content.extend_from_slice(snapshot.remote_url.as_bytes());
    content.push(b'\n');
    content.extend_from_slice(snapshot.commit_sha.as_bytes());
    content.push(b'\n');
    for file in &snapshot.files {
        content.extend_from_slice(b"\n-- ");
        content.extend_from_slice(file.path.as_bytes());
        content.extend_from_slice(b" --\n");
        content.extend_from_slice(&file.bytes);
        if !file.bytes.ends_with(b"\n") {
            content.push(b'\n');
        }
    }
    content
}

fn render_git_markdown(snapshot: &GitRepositorySnapshot, title: &str, source_hash: &str) -> String {
    let mut markdown = git_markdown_metadata(&[
        ("source_kind", "git_repository".to_string()),
        ("git_remote", snapshot.remote_url.clone()),
        ("git_commit", snapshot.commit_sha.clone()),
        ("fetched_at", snapshot.fetched_at.clone()),
        ("source_hash", source_hash.to_string()),
    ]);
    markdown.push_str("# ");
    markdown.push_str(title);
    markdown.push_str("\n\n");

    for file in &snapshot.files {
        markdown.push_str("## ");
        markdown.push_str(&markdown_title(&file.path));
        markdown.push_str("\n\n");
        markdown.push_str("file_path: ");
        markdown.push_str(&single_line(&file.path));
        let file_text = text_from_utf8_lossy(&file.bytes);
        let fence = markdown_code_fence(&file_text);
        markdown.push_str("\n\n");
        markdown.push_str(&fence);
        markdown.push_str(&code_fence_info(&file.path));
        markdown.push('\n');
        markdown.push_str(&file_text);
        if !markdown.ends_with('\n') {
            markdown.push('\n');
        }
        markdown.push_str(&fence);
        markdown.push_str("\n\n");
    }
    markdown
}

fn git_markdown_metadata(fields: &[(&str, String)]) -> String {
    let mut mapping = serde_yaml::Mapping::new();
    for (key, value) in fields {
        mapping.insert(
            serde_yaml::Value::String((*key).to_string()),
            serde_yaml::Value::String(single_line(value)),
        );
    }
    let mut metadata = String::from("---\n");
    metadata.push_str(
        &serde_yaml::to_string(&serde_yaml::Value::Mapping(mapping))
            .expect("git frontmatter serializes"),
    );
    metadata.push_str("---\n\n");
    metadata
}

fn code_fence_info(path: &str) -> String {
    Path::new(path)
        .extension()
        .and_then(|extension| extension.to_str())
        .map(|extension| {
            extension
                .chars()
                .filter(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-'))
                .collect::<String>()
        })
        .filter(|extension| !extension.is_empty())
        .unwrap_or_else(|| "text".to_string())
}

fn markdown_code_fence(text: &str) -> String {
    let backticks = bounded_max_run(text, '`');
    let tildes = bounded_max_run(text, '~');
    let (delimiter, max_run) = if backticks <= tildes {
        ('`', backticks)
    } else {
        ('~', tildes)
    };
    std::iter::repeat_n(delimiter, max_run.saturating_add(1).max(3)).collect()
}

fn bounded_max_run(text: &str, delimiter: char) -> usize {
    let mut max_run = 0usize;
    let mut current_run = 0usize;
    for ch in text.chars() {
        if ch == delimiter {
            current_run += 1;
            max_run = max_run.max(current_run);
            if max_run + 1 >= MAX_CODE_FENCE_LEN {
                return MAX_CODE_FENCE_LEN - 1;
            }
        } else {
            current_run = 0;
        }
    }
    max_run
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::sources::{SourceKind, SourceManifest};
    use crate::store::MemoryWikiStore;

    #[test]
    fn git_ingest_records_commit_provenance() {
        let temp = tempfile::tempdir().expect("tempdir");
        let snapshot = GitRepositorySnapshot {
            remote_url: "https://github.com/GobbyAI/example.git".to_string(),
            commit_sha: "7f83b1657ff1fc53b92dc18148a1d65dfa135adb".to_string(),
            fetched_at: "2026-05-29T18:20:00Z".to_string(),
            files: vec![
                GitFileSnapshot {
                    path: "README.md".to_string(),
                    bytes: b"# Example\n\nRepository notes.\n".to_vec(),
                },
                GitFileSnapshot {
                    path: "src/lib.rs".to_string(),
                    bytes: b"pub fn answer() -> u8 { 42 }\n".to_vec(),
                },
            ],
        };
        let mut store = MemoryWikiStore::default();

        let result =
            ingest_repository(temp.path(), &mut store, snapshot).expect("ingest git repository");

        let raw = std::fs::read_to_string(temp.path().join(&result.raw_path))
            .expect("raw markdown written");
        assert!(raw.contains("# https://github.com/GobbyAI/example.git"));
        let frontmatter = raw
            .strip_prefix("---\n")
            .and_then(|rest| rest.split_once("\n---\n"))
            .map(|(frontmatter, _)| frontmatter)
            .expect("frontmatter block");
        let frontmatter: serde_yaml::Value =
            serde_yaml::from_str(frontmatter).expect("parse frontmatter");
        assert_eq!(frontmatter["source_kind"], "git_repository");
        assert_eq!(
            frontmatter["git_remote"],
            "https://github.com/GobbyAI/example.git"
        );
        assert_eq!(
            frontmatter["git_commit"],
            "7f83b1657ff1fc53b92dc18148a1d65dfa135adb"
        );
        assert!(raw.contains("file_path: README.md"));
        assert!(raw.contains("file_path: src/lib.rs"));
        assert!(raw.contains("Repository notes."));
        assert!(raw.contains("pub fn answer() -> u8 { 42 }"));

        let manifest = SourceManifest::read(temp.path()).expect("read source manifest");
        assert_eq!(manifest.entries.len(), 1);
        let entry = &manifest.entries[0];
        assert_eq!(entry.kind, SourceKind::GitRepository);
        assert_eq!(
            entry.canonical_location,
            "git+https://github.com/GobbyAI/example.git@7f83b1657ff1fc53b92dc18148a1d65dfa135adb"
        );
        assert_eq!(entry.fetched_at, "2026-05-29T18:20:00Z");
    }

    #[test]
    fn code_fence_length_is_bounded_by_switching_delimiters() {
        let text = "`".repeat(MAX_CODE_FENCE_LEN * 4);

        assert_eq!(markdown_code_fence(&text), "~~~");
        assert_eq!(
            markdown_code_fence(&"~".repeat(MAX_CODE_FENCE_LEN * 4)).len(),
            3
        );
    }

    #[test]
    fn code_fence_length_is_clamped_when_both_delimiters_are_saturated() {
        let text = format!(
            "{}\n{}",
            "`".repeat(MAX_CODE_FENCE_LEN * 4),
            "~".repeat(MAX_CODE_FENCE_LEN * 5)
        );

        let fence = markdown_code_fence(&text);

        assert!(fence.chars().all(|ch| ch == '`'));
        assert_eq!(fence.len(), MAX_CODE_FENCE_LEN);
    }
}