use std::collections::BTreeMap;
use std::fmt;
use std::path::{Path, PathBuf};
use postgres::Transaction;
use serde_json::json;
use crate::models::WikiScope;
pub const MAX_MEMORY_INDEX_BYTES_ENV: &str = "GWIKI_MAX_MEMORY_INDEX_BYTES";
const MAX_ID_LEN: usize = 63;
const HASH_SUFFIX_LEN: usize = 12;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WikiDocumentKind {
SourceCatalog,
SourceNote,
Concept,
Topic,
CodeDoc,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WikiDocument {
pub path: PathBuf,
pub kind: WikiDocumentKind,
pub title: Option<String>,
pub content_hash: String,
pub body: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WikiChunk {
pub path: PathBuf,
pub chunk_index: usize,
pub byte_start: usize,
pub byte_end: usize,
pub heading: Option<String>,
pub content: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WikiLink {
pub path: PathBuf,
pub target: String,
pub alias: Option<String>,
pub byte_start: usize,
pub byte_end: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WikiSource {
pub path: PathBuf,
pub document_path: PathBuf,
pub kind: WikiDocumentKind,
pub content_hash: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WikiIngestionEvent {
Added,
Changed,
Deleted,
Unchanged,
Skipped,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WikiIngestion {
pub path: PathBuf,
pub event: WikiIngestionEvent,
pub content_hash: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct WikiStoreScope {
scope: WikiScope,
}
impl WikiStoreScope {
pub fn project(project_id: impl Into<String>) -> Self {
Self {
scope: WikiScope::Project {
project_id: project_id.into(),
},
}
}
pub fn topic(topic_name: impl Into<String>) -> Self {
Self {
scope: WikiScope::Topic {
name: topic_name.into(),
},
}
}
pub fn scope_kind(&self) -> &str {
self.scope.kind()
}
pub fn scope_id(&self) -> &str {
self.scope.identity()
}
fn project_id(&self) -> Option<String> {
self.scope.project_id().map(ToOwned::to_owned)
}
fn topic_name(&self) -> Option<String> {
self.scope.topic_name().map(ToOwned::to_owned)
}
}
#[derive(Debug)]
pub enum StoreError {
InvalidData {
field: &'static str,
message: String,
},
Postgres(String),
}
impl fmt::Display for StoreError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
StoreError::InvalidData { field, message } => {
write!(f, "invalid {field}: {message}")
}
StoreError::Postgres(message) => {
write!(f, "PostgreSQL wiki index write failed: {message}")
}
}
}
}
impl std::error::Error for StoreError {}
impl From<postgres::Error> for StoreError {
fn from(error: postgres::Error) -> Self {
StoreError::Postgres(error.to_string())
}
}
pub trait WikiIndexStore {
fn indexed_hashes(&mut self) -> Result<BTreeMap<PathBuf, String>, StoreError>;
fn upsert_document(&mut self, document: WikiDocument) -> Result<(), StoreError>;
fn replace_chunks(&mut self, path: &Path, chunks: Vec<WikiChunk>) -> Result<(), StoreError>;
fn replace_links(&mut self, path: &Path, links: Vec<WikiLink>) -> Result<(), StoreError>;
fn upsert_source(&mut self, source: WikiSource) -> Result<(), StoreError>;
fn record_ingestion(&mut self, ingestion: WikiIngestion) -> Result<(), StoreError>;
fn record_file_hash(&mut self, path: PathBuf, content_hash: String) -> Result<(), StoreError>;
fn delete_derived_rows(&mut self, path: &Path) -> Result<(), StoreError>;
}
#[derive(Debug, Default)]
pub struct MemoryWikiStore {
pub documents: BTreeMap<PathBuf, WikiDocument>,
pub chunks: BTreeMap<PathBuf, Vec<WikiChunk>>,
pub links: BTreeMap<PathBuf, Vec<WikiLink>>,
pub sources: BTreeMap<PathBuf, WikiSource>,
pub file_hashes: BTreeMap<PathBuf, String>,
pub ingestions: Vec<WikiIngestion>,
pub deleted_paths: Vec<PathBuf>,
pub document_upserts: usize,
pub chunk_replacements: usize,
pub link_replacements: usize,
pub source_upserts: usize,
}
impl WikiIndexStore for MemoryWikiStore {
fn indexed_hashes(&mut self) -> Result<BTreeMap<PathBuf, String>, StoreError> {
Ok(self.file_hashes.clone())
}
fn upsert_document(&mut self, document: WikiDocument) -> Result<(), StoreError> {
self.document_upserts += 1;
self.documents.insert(document.path.clone(), document);
Ok(())
}
fn replace_chunks(&mut self, path: &Path, chunks: Vec<WikiChunk>) -> Result<(), StoreError> {
validate_chunk_paths(path, &chunks)?;
self.chunk_replacements += 1;
self.chunks.insert(path.to_path_buf(), chunks);
Ok(())
}
fn replace_links(&mut self, path: &Path, links: Vec<WikiLink>) -> Result<(), StoreError> {
validate_link_paths(path, &links)?;
self.link_replacements += 1;
self.links.insert(path.to_path_buf(), links);
Ok(())
}
fn upsert_source(&mut self, source: WikiSource) -> Result<(), StoreError> {
self.source_upserts += 1;
self.sources.insert(source.document_path.clone(), source);
Ok(())
}
fn record_ingestion(&mut self, ingestion: WikiIngestion) -> Result<(), StoreError> {
self.ingestions.push(ingestion);
Ok(())
}
fn record_file_hash(&mut self, path: PathBuf, content_hash: String) -> Result<(), StoreError> {
self.file_hashes.insert(path, content_hash);
Ok(())
}
fn delete_derived_rows(&mut self, path: &Path) -> Result<(), StoreError> {
let path = path.to_path_buf();
self.documents.remove(&path);
self.chunks.remove(&path);
self.links.remove(&path);
self.sources.remove(&path);
self.file_hashes.remove(&path);
self.deleted_paths.push(path);
Ok(())
}
}
#[derive(Debug, Clone)]
struct DocumentMeta {
id: String,
source_kind: String,
content_hash: String,
}
pub struct PostgresWikiStore<'a> {
conn: &'a mut postgres::Client,
scope: WikiStoreScope,
documents: BTreeMap<PathBuf, DocumentMeta>,
}
impl<'a> PostgresWikiStore<'a> {
pub fn new(conn: &'a mut postgres::Client, scope: WikiStoreScope) -> Self {
Self {
conn,
scope,
documents: BTreeMap::new(),
}
}
fn scope_params(&self) -> (String, String, Option<String>, Option<String>) {
(
self.scope.scope_kind().to_string(),
self.scope.scope_id().to_string(),
self.scope.project_id(),
self.scope.topic_name(),
)
}
fn document_meta(&mut self, path: &Path) -> Result<DocumentMeta, StoreError> {
if let Some(meta) = self.documents.get(path) {
return Ok(meta.clone());
}
let path_string = display_path(path);
let row = self.conn.query_opt(
"SELECT id, source_kind, content_hash
FROM gwiki_documents
WHERE scope_kind = $1 AND scope_id = $2 AND path = $3",
&[
&self.scope.scope_kind(),
&self.scope.scope_id(),
&path_string,
],
)?;
let row = row.ok_or_else(|| StoreError::InvalidData {
field: "document",
message: format!("missing indexed document for {}", path.display()),
})?;
let meta = DocumentMeta {
id: row.get("id"),
source_kind: row.get("source_kind"),
content_hash: row.get("content_hash"),
};
self.documents.insert(path.to_path_buf(), meta.clone());
Ok(meta)
}
}
impl WikiIndexStore for PostgresWikiStore<'_> {
fn indexed_hashes(&mut self) -> Result<BTreeMap<PathBuf, String>, StoreError> {
let rows = self.conn.query(
"SELECT path, content_hash
FROM gwiki_documents
WHERE scope_kind = $1 AND scope_id = $2",
&[&self.scope.scope_kind(), &self.scope.scope_id()],
)?;
Ok(rows
.into_iter()
.map(|row| {
(
platform_path_from_display(&row.get::<_, String>("path")),
row.get("content_hash"),
)
})
.collect())
}
fn upsert_document(&mut self, document: WikiDocument) -> Result<(), StoreError> {
let id = scoped_id("document", &self.scope, &document.path, None);
let path = display_path(&document.path);
let source_kind = document_kind_name(document.kind);
let provenance = json!({ "source_path": path }).to_string();
let frontmatter = "{}";
let (scope_kind, scope_id, project_id, topic_name) = self.scope_params();
self.conn.execute(
"INSERT INTO gwiki_documents (
id, scope_kind, scope_id, project_id, topic_name, path, title, source_kind,
content_hash, frontmatter, provenance, body, indexed_at, updated_at
)
VALUES (
$1, $2, $3, $4, $5, $6, $7, $8,
$9, $10::jsonb, $11::jsonb, $12, NOW(), NOW()
)
ON CONFLICT (scope_kind, scope_id, path)
DO UPDATE SET
id = EXCLUDED.id,
project_id = EXCLUDED.project_id,
topic_name = EXCLUDED.topic_name,
title = EXCLUDED.title,
source_kind = EXCLUDED.source_kind,
content_hash = EXCLUDED.content_hash,
frontmatter = EXCLUDED.frontmatter,
provenance = EXCLUDED.provenance,
body = EXCLUDED.body,
indexed_at = NOW(),
updated_at = NOW()",
&[
&id,
&scope_kind,
&scope_id,
&project_id,
&topic_name,
&path,
&document.title,
&source_kind,
&document.content_hash,
&frontmatter,
&provenance,
&document.body,
],
)?;
self.documents.insert(
document.path,
DocumentMeta {
id,
source_kind: source_kind.to_string(),
content_hash: document.content_hash,
},
);
Ok(())
}
fn replace_chunks(&mut self, path: &Path, chunks: Vec<WikiChunk>) -> Result<(), StoreError> {
validate_chunk_paths(path, &chunks)?;
let document = self.document_meta(path)?;
let path_string = display_path(path);
let scope = self.scope.clone();
let chunks = chunks
.into_iter()
.map(|chunk| {
let chunk_index =
i32::try_from(chunk.chunk_index).map_err(|_| StoreError::InvalidData {
field: "chunk_index",
message: format!(
"{} is too large for PostgreSQL INTEGER",
chunk.chunk_index
),
})?;
Ok((chunk, chunk_index))
})
.collect::<Result<Vec<_>, StoreError>>()?;
let mut tx = self.conn.transaction()?;
if let Err(error) = tx.execute(
"DELETE FROM gwiki_chunks WHERE scope_kind = $1 AND scope_id = $2 AND path = $3",
&[&scope.scope_kind(), &scope.scope_id(), &path_string],
) {
let error = StoreError::from(error);
rollback_chunk_replacement(tx, &path_string);
return Err(error);
}
if chunks.is_empty() {
tx.commit()?;
return Ok(());
}
for (chunk, chunk_index) in chunks {
let chunk_path = display_path(&chunk.path);
let id = scoped_id(
"chunk",
&scope,
&chunk.path,
Some(&chunk.chunk_index.to_string()),
);
let heading_path = chunk
.heading
.as_ref()
.map(|heading| vec![heading.clone()])
.unwrap_or_default();
let provenance = json!({
"source_path": chunk_path,
"byte_start": chunk.byte_start,
"byte_end": chunk.byte_end,
"heading": chunk.heading,
})
.to_string();
let frontmatter = "{}";
let (scope_kind, scope_id, project_id, topic_name) = (
scope.scope_kind().to_string(),
scope.scope_id().to_string(),
scope.project_id(),
scope.topic_name(),
);
if let Err(error) = tx.execute(
"INSERT INTO gwiki_chunks (
id, document_id, scope_kind, scope_id, project_id, topic_name, path,
chunk_index, source_kind, content_hash, frontmatter, provenance,
heading_path, content, created_at
)
VALUES (
$1, $2, $3, $4, $5, $6, $7,
$8, $9, $10, $11::jsonb, $12::jsonb,
$13, $14, NOW()
)",
&[
&id,
&document.id,
&scope_kind,
&scope_id,
&project_id,
&topic_name,
&chunk_path,
&chunk_index,
&document.source_kind,
&document.content_hash,
&frontmatter,
&provenance,
&heading_path,
&chunk.content,
],
) {
let error = StoreError::from(error);
rollback_chunk_replacement(tx, &path_string);
return Err(error);
}
}
tx.commit()?;
Ok(())
}
fn replace_links(&mut self, path: &Path, links: Vec<WikiLink>) -> Result<(), StoreError> {
validate_link_paths(path, &links)?;
let path_string = display_path(path);
let scope = self.scope.clone();
let mut tx = self.conn.transaction()?;
if let Err(error) = tx.execute(
"DELETE FROM gwiki_links WHERE scope_kind = $1 AND scope_id = $2 AND path = $3",
&[&scope.scope_kind(), &scope.scope_id(), &path_string],
) {
let error = StoreError::from(error);
rollback_link_replacement(tx, &path_string);
return Err(error);
}
for link in links {
let target_path = link.target.clone();
let link_text = link.alias.clone().unwrap_or_else(|| link.target.clone());
let link_kind = link_kind(&link.target);
let id = scoped_text_id(
"link",
&scope,
&link.path,
&[&target_path, &link_text, link_kind],
);
let path = display_path(&link.path);
let provenance = json!({
"byte_start": link.byte_start,
"byte_end": link.byte_end,
"alias": link.alias,
})
.to_string();
let (scope_kind, scope_id, project_id, topic_name) = (
scope.scope_kind().to_string(),
scope.scope_id().to_string(),
scope.project_id(),
scope.topic_name(),
);
if let Err(error) = tx.execute(
"INSERT INTO gwiki_links (
id, scope_kind, scope_id, project_id, topic_name, path,
target_path, link_text, link_kind, provenance, created_at
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::jsonb, NOW())
ON CONFLICT (scope_kind, scope_id, path, target_path, link_text, link_kind)
DO UPDATE SET
id = EXCLUDED.id,
project_id = EXCLUDED.project_id,
topic_name = EXCLUDED.topic_name,
provenance = EXCLUDED.provenance",
&[
&id,
&scope_kind,
&scope_id,
&project_id,
&topic_name,
&path,
&target_path,
&link_text,
&link_kind,
&provenance,
],
) {
let error = StoreError::from(error);
rollback_link_replacement(tx, &path_string);
return Err(error);
}
}
tx.commit().map_err(StoreError::from)
}
fn upsert_source(&mut self, source: WikiSource) -> Result<(), StoreError> {
let id = scoped_id("source", &self.scope, &source.document_path, None);
let path = display_path(&source.path);
let document_path = display_path(&source.document_path);
let source_kind = document_kind_name(source.kind);
let provenance = json!({
"source_path": &path,
"document_path": &document_path,
})
.to_string();
let frontmatter = "{}";
let (scope_kind, scope_id, project_id, topic_name) = self.scope_params();
self.conn.execute(
"INSERT INTO gwiki_sources (
id, scope_kind, scope_id, project_id, topic_name, path, document_path, source_kind,
content_hash, frontmatter, provenance, captured_at
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::jsonb, $11::jsonb, NOW())
ON CONFLICT (scope_kind, scope_id, document_path)
DO UPDATE SET
id = EXCLUDED.id,
project_id = EXCLUDED.project_id,
topic_name = EXCLUDED.topic_name,
path = EXCLUDED.path,
source_kind = EXCLUDED.source_kind,
content_hash = EXCLUDED.content_hash,
frontmatter = EXCLUDED.frontmatter,
provenance = EXCLUDED.provenance,
captured_at = NOW()",
&[
&id,
&scope_kind,
&scope_id,
&project_id,
&topic_name,
&path,
&document_path,
&source_kind,
&source.content_hash,
&frontmatter,
&provenance,
],
)?;
Ok(())
}
fn record_ingestion(&mut self, ingestion: WikiIngestion) -> Result<(), StoreError> {
let content_hash = ingestion.content_hash.clone();
let status = ingestion_status(ingestion.event);
let id = scoped_text_id("ingestion", &self.scope, &ingestion.path, &[status]);
let path = display_path(&ingestion.path);
let source_kind = self
.documents
.get(&ingestion.path)
.map(|document| document.source_kind.as_str())
.unwrap_or("unknown");
let provenance = json!({ "event": status }).to_string();
let frontmatter = "{}";
let (scope_kind, scope_id, project_id, topic_name) = self.scope_params();
self.conn.execute(
"INSERT INTO gwiki_ingestions (
id, scope_kind, scope_id, project_id, topic_name, path, source_kind,
content_hash, frontmatter, provenance, status, ingested_at
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9::jsonb, $10::jsonb, $11, NOW())
ON CONFLICT (id)
DO UPDATE SET
project_id = EXCLUDED.project_id,
topic_name = EXCLUDED.topic_name,
source_kind = EXCLUDED.source_kind,
content_hash = EXCLUDED.content_hash,
frontmatter = EXCLUDED.frontmatter,
provenance = EXCLUDED.provenance,
status = EXCLUDED.status,
ingested_at = NOW()",
&[
&id,
&scope_kind,
&scope_id,
&project_id,
&topic_name,
&path,
&source_kind,
&content_hash,
&frontmatter,
&provenance,
&status,
],
)?;
Ok(())
}
fn record_file_hash(
&mut self,
_path: PathBuf,
_content_hash: String,
) -> Result<(), StoreError> {
Ok(())
}
fn delete_derived_rows(&mut self, path: &Path) -> Result<(), StoreError> {
let path = display_path(path);
let scope_kind = self.scope.scope_kind().to_string();
let scope_id = self.scope.scope_id().to_string();
let mut tx = self.conn.transaction()?;
let params: [&(dyn postgres::types::ToSql + Sync); 3] = [&scope_kind, &scope_id, &path];
tx.execute(
"DELETE FROM gwiki_chunks WHERE scope_kind = $1 AND scope_id = $2 AND path = $3",
¶ms,
)?;
tx.execute(
"DELETE FROM gwiki_links WHERE scope_kind = $1 AND scope_id = $2 AND path = $3",
¶ms,
)?;
tx.execute(
"DELETE FROM gwiki_sources WHERE scope_kind = $1 AND scope_id = $2 AND document_path = $3",
¶ms,
)?;
tx.execute(
"DELETE FROM gwiki_documents WHERE scope_kind = $1 AND scope_id = $2 AND path = $3",
¶ms,
)?;
tx.commit()?;
self.documents.remove(&platform_path_from_display(&path));
Ok(())
}
}
fn display_path(path: &Path) -> String {
path.to_string_lossy().replace('\\', "/")
}
fn validate_chunk_paths(path: &Path, chunks: &[WikiChunk]) -> Result<(), StoreError> {
for chunk in chunks {
validate_matching_path("chunk.path", path, &chunk.path)?;
}
Ok(())
}
fn validate_link_paths(path: &Path, links: &[WikiLink]) -> Result<(), StoreError> {
for link in links {
validate_matching_path("link.path", path, &link.path)?;
}
Ok(())
}
fn validate_matching_path(
field: &'static str,
expected: &Path,
found: &Path,
) -> Result<(), StoreError> {
if equivalent_display_path(expected, found) {
return Ok(());
}
Err(StoreError::InvalidData {
field,
message: format!(
"expected {}, found {}",
display_path(expected),
display_path(found)
),
})
}
fn equivalent_display_path(left: &Path, right: &Path) -> bool {
display_path(left) == display_path(right)
}
fn platform_path_from_display(path: &str) -> PathBuf {
if std::path::MAIN_SEPARATOR == '/' {
PathBuf::from(path)
} else {
PathBuf::from(path.replace('/', std::path::MAIN_SEPARATOR_STR))
}
}
fn scoped_id(prefix: &str, scope: &WikiStoreScope, path: &Path, suffix: Option<&str>) -> String {
match suffix {
Some(value) => scoped_text_id(prefix, scope, path, &[value]),
None => scoped_text_id(prefix, scope, path, &[]),
}
}
fn scoped_text_id(prefix: &str, scope: &WikiStoreScope, path: &Path, suffixes: &[&str]) -> String {
let mut id = format!(
"{prefix}:{}:{}:{}",
scope.scope_kind(),
scope.scope_id(),
display_path(path)
);
for suffix in suffixes {
id.push(':');
id.push_str(suffix);
}
cap_scoped_id(id)
}
fn cap_scoped_id(id: String) -> String {
let hash = gobby_core::indexing::content_hash(id.as_bytes());
cap_scoped_id_with_hash(id, &hash)
}
fn cap_scoped_id_with_hash(id: String, hash: &str) -> String {
if id.len() <= MAX_ID_LEN {
return id;
}
let suffix = if hash.len() >= HASH_SUFFIX_LEN {
&hash[..HASH_SUFFIX_LEN]
} else {
hash
};
let prefix_len = MAX_ID_LEN.saturating_sub(suffix.len()).saturating_sub(1);
let mut prefix = String::new();
for ch in id.chars() {
if prefix.len() + ch.len_utf8() > prefix_len {
break;
}
prefix.push(ch);
}
format!("{prefix}-{suffix}")
}
fn document_kind_name(kind: WikiDocumentKind) -> &'static str {
match kind {
WikiDocumentKind::SourceCatalog => "source_catalog",
WikiDocumentKind::SourceNote => "source_note",
WikiDocumentKind::Concept => "concept",
WikiDocumentKind::Topic => "topic",
WikiDocumentKind::CodeDoc => "code_doc",
}
}
fn ingestion_status(event: WikiIngestionEvent) -> &'static str {
match event {
WikiIngestionEvent::Added => "added",
WikiIngestionEvent::Changed => "changed",
WikiIngestionEvent::Deleted => "deleted",
WikiIngestionEvent::Unchanged => "unchanged",
WikiIngestionEvent::Skipped => "skipped",
}
}
pub(crate) fn link_kind(target: &str) -> &'static str {
let trimmed = target.trim();
if trimmed.starts_with("//") || trimmed.starts_with("\\\\") || has_uri_scheme(trimmed) {
"markdown"
} else {
"wiki"
}
}
fn has_uri_scheme(target: &str) -> bool {
let Some((scheme, _rest)) = target.split_once(':') else {
return false;
};
let mut chars = scheme.chars();
chars.next().is_some_and(|ch| ch.is_ascii_alphabetic())
&& chars.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '+' | '.' | '-'))
}
fn rollback_link_replacement(tx: Transaction<'_>, path: &str) {
if let Err(error) = tx.rollback() {
log::error!("failed to rollback gwiki link replacement for {path}: {error}");
}
}
fn rollback_chunk_replacement(tx: Transaction<'_>, path: &str) {
if let Err(error) = tx.rollback() {
log::error!("failed to rollback gwiki chunk replacement for {path}: {error}");
}
}
pub fn configured_memory_index_limit_bytes() -> Option<u64> {
match std::env::var(MAX_MEMORY_INDEX_BYTES_ENV) {
Ok(raw) => raw
.parse::<u64>()
.ok()
.filter(|value| *value > 0)
.or_else(|| {
eprintln!("warning: ignoring invalid {MAX_MEMORY_INDEX_BYTES_ENV}={raw}");
None
}),
Err(_) => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn link_kind_classifies_uri_schemes_and_fragments() {
assert_eq!(link_kind("https://example.test"), "markdown");
assert_eq!(link_kind("mailto:hello@example.test"), "markdown");
assert_eq!(link_kind("tel:+15551234567"), "markdown");
assert_eq!(link_kind("//example.test/path"), "markdown");
assert_eq!(link_kind("#local-section"), "wiki");
assert_eq!(link_kind("Concept Page"), "wiki");
}
#[test]
fn scoped_ids_are_capped_with_deterministic_hash_suffix() {
let scope = WikiStoreScope::project("project-with-a-very-long-identifier");
let id = scoped_text_id(
"chunk",
&scope,
Path::new("wiki/topics/a-very-long-path-name-that-keeps-going.md"),
&["1234567890"],
);
let id_again = scoped_text_id(
"chunk",
&scope,
Path::new("wiki/topics/a-very-long-path-name-that-keeps-going.md"),
&["1234567890"],
);
assert!(id.len() <= MAX_ID_LEN);
assert_eq!(id, id_again);
assert!(
id.rsplit_once('-')
.is_some_and(|(_, suffix)| suffix.len() == HASH_SUFFIX_LEN)
);
}
#[test]
fn scoped_id_capping_tolerates_short_hashes() {
let id = cap_scoped_id_with_hash("x".repeat(MAX_ID_LEN + 20), "abc");
assert!(id.len() <= MAX_ID_LEN);
assert!(id.ends_with("-abc"));
}
#[test]
fn memory_store_rejects_path_mismatches() {
let mut store = MemoryWikiStore::default();
let err = store
.replace_chunks(
Path::new("wiki/topics/page.md"),
vec![WikiChunk {
path: PathBuf::from("wiki/topics/other.md"),
chunk_index: 0,
byte_start: 0,
byte_end: 4,
heading: None,
content: "body".to_string(),
}],
)
.expect_err("mismatched chunk path must fail");
assert!(matches!(
err,
StoreError::InvalidData {
field: "chunk.path",
..
}
));
let err = store
.replace_links(
Path::new("wiki/topics/page.md"),
vec![WikiLink {
path: PathBuf::from("wiki/topics/other.md"),
target: "Target".to_string(),
alias: None,
byte_start: 0,
byte_end: 8,
}],
)
.expect_err("mismatched link path must fail");
assert!(matches!(
err,
StoreError::InvalidData {
field: "link.path",
..
}
));
}
#[test]
fn memory_store_keys_sources_by_document_path() {
let mut store = MemoryWikiStore::default();
let document_path = PathBuf::from("wiki/sources/example.md");
let source = WikiSource {
path: PathBuf::from("raw/example.md"),
document_path: document_path.clone(),
kind: WikiDocumentKind::SourceNote,
content_hash: "hash".to_string(),
};
store.upsert_source(source).expect("source upsert");
assert!(store.sources.contains_key(&document_path));
assert!(!store.sources.contains_key(Path::new("raw/example.md")));
}
}