systemprompt-content 0.2.2

Markdown content management, sources, and event tracking for systemprompt.io AI governance dashboards. Governed publishing pipeline for the MCP governance platform.
Documentation
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use systemprompt_identifiers::{CategoryId, SourceId};
use systemprompt_models::services::{ContentConfig, ServicesConfig};
use systemprompt_models::{
    Category, ContentConfigError, ContentConfigErrors, ContentConfigRaw, ContentRouting,
    ContentSourceConfigRaw, IndexingConfig, Metadata, SitemapConfig, SourceBranding,
};

const SOURCE_WEB: &str = "web";
const SOURCE_UNKNOWN: &str = "unknown";

#[derive(Debug, Clone)]
pub struct ContentConfigValidated {
    content_sources: HashMap<String, ContentSourceConfigValidated>,
    metadata: Metadata,
    categories: HashMap<String, Category>,
    base_path: PathBuf,
}

#[derive(Debug, Clone)]
pub struct ContentSourceConfigValidated {
    pub path: PathBuf,
    pub source_id: SourceId,
    pub category_id: CategoryId,
    pub enabled: bool,
    pub description: String,
    pub allowed_content_types: Vec<String>,
    pub indexing: IndexingConfig,
    pub sitemap: Option<SitemapConfig>,
    pub branding: Option<SourceBranding>,
}

pub type ValidationResult = Result<ContentConfigValidated, ContentConfigErrors>;

impl ContentConfigValidated {
    pub fn from_raw(raw: ContentConfigRaw, base_path: PathBuf) -> ValidationResult {
        let mut errors = ContentConfigErrors::new();

        let categories = validate_categories(&raw.categories, &mut errors);
        let content_sources = validate_sources(&raw, &categories, &base_path, &mut errors);

        errors.into_result(Self {
            content_sources,
            metadata: raw.metadata,
            categories,
            base_path,
        })
    }

    pub fn from_content_config(config: &ContentConfig, base_path: PathBuf) -> ValidationResult {
        Self::from_raw(config.raw.clone(), base_path)
    }

    pub fn from_services_config(services: &ServicesConfig, base_path: PathBuf) -> ValidationResult {
        Self::from_content_config(&services.content, base_path)
    }

    pub const fn content_sources(&self) -> &HashMap<String, ContentSourceConfigValidated> {
        &self.content_sources
    }

    pub const fn metadata(&self) -> &Metadata {
        &self.metadata
    }

    pub const fn categories(&self) -> &HashMap<String, Category> {
        &self.categories
    }

    pub const fn base_path(&self) -> &PathBuf {
        &self.base_path
    }

    pub fn is_html_page(&self, path: &str) -> bool {
        if path == "/" {
            return true;
        }

        self.content_sources
            .values()
            .filter(|source| source.enabled)
            .filter_map(|source| source.sitemap.as_ref())
            .filter(|sitemap| sitemap.enabled)
            .any(|sitemap| Self::matches_url_pattern(&sitemap.url_pattern, path))
    }

    fn matches_url_pattern(pattern: &str, path: &str) -> bool {
        let pattern_parts: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect();
        let path_parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();

        if pattern_parts.len() != path_parts.len() {
            return false;
        }

        pattern_parts
            .iter()
            .zip(path_parts.iter())
            .all(|(pattern_part, path_part)| *pattern_part == "{slug}" || pattern_part == path_part)
    }

    pub fn determine_source(&self, path: &str) -> String {
        if path == "/" {
            return SOURCE_WEB.to_string();
        }

        self.content_sources
            .iter()
            .filter(|(_, source)| source.enabled)
            .find_map(|(name, source)| {
                source.sitemap.as_ref().and_then(|sitemap| {
                    (sitemap.enabled && Self::matches_url_pattern(&sitemap.url_pattern, path))
                        .then(|| name.clone())
                })
            })
            .unwrap_or_else(|| SOURCE_UNKNOWN.to_string())
    }

    pub fn resolve_slug(&self, path: &str) -> Option<String> {
        self.content_sources
            .values()
            .filter(|source| source.enabled)
            .filter_map(|source| source.sitemap.as_ref())
            .filter(|sitemap| sitemap.enabled)
            .find_map(|sitemap| extract_slug_from_pattern(path, &sitemap.url_pattern))
    }
}

fn extract_slug_from_pattern(path: &str, pattern: &str) -> Option<String> {
    let prefix = pattern.split('{').next()?;
    let raw = path.strip_prefix(prefix)?.trim_end_matches('/');
    let raw = raw.split('?').next().unwrap_or(raw);
    let raw = raw.split('#').next().unwrap_or(raw);
    (!raw.is_empty()).then(|| raw.to_string())
}

impl ContentRouting for ContentConfigValidated {
    fn is_html_page(&self, path: &str) -> bool {
        ContentConfigValidated::is_html_page(self, path)
    }

    fn determine_source(&self, path: &str) -> String {
        ContentConfigValidated::determine_source(self, path)
    }

    fn resolve_slug(&self, path: &str) -> Option<String> {
        ContentConfigValidated::resolve_slug(self, path)
    }
}

fn validate_categories(
    raw: &HashMap<String, Category>,
    errors: &mut ContentConfigErrors,
) -> HashMap<String, Category> {
    let mut validated = HashMap::new();

    for (id, cat) in raw {
        if cat.name.is_empty() {
            errors.push(ContentConfigError::Validation {
                field: format!("categories.{id}.name"),
                message: "Category name cannot be empty".to_string(),
                suggestion: Some("Provide a non-empty name".to_string()),
            });
            continue;
        }
        validated.insert(id.clone(), cat.clone());
    }

    validated
}

fn validate_sources(
    raw: &ContentConfigRaw,
    categories: &HashMap<String, Category>,
    base_path: &Path,
    errors: &mut ContentConfigErrors,
) -> HashMap<String, ContentSourceConfigValidated> {
    let mut validated = HashMap::new();

    for (name, source) in &raw.content_sources {
        if let Some(validated_source) =
            validate_single_source(name, source, categories, base_path, errors)
        {
            validated.insert(name.clone(), validated_source);
        }
    }

    validated
}

fn validate_single_source(
    name: &str,
    source: &ContentSourceConfigRaw,
    categories: &HashMap<String, Category>,
    base_path: &Path,
    errors: &mut ContentConfigErrors,
) -> Option<ContentSourceConfigValidated> {
    let field_prefix = format!("content_sources.{name}");

    if source.path.is_empty() {
        errors.push(ContentConfigError::Validation {
            field: format!("{field_prefix}.path"),
            message: "Source path is required".to_string(),
            suggestion: Some("Add a path to the content directory".to_string()),
        });
        return None;
    }

    if source.source_id.as_str().is_empty() {
        errors.push(ContentConfigError::Validation {
            field: format!("{field_prefix}.source_id"),
            message: "source_id is required".to_string(),
            suggestion: Some("Add a unique source_id".to_string()),
        });
        return None;
    }

    if source.category_id.as_str().is_empty() {
        errors.push(ContentConfigError::Validation {
            field: format!("{field_prefix}.category_id"),
            message: "category_id is required".to_string(),
            suggestion: Some("Add a category_id that references a defined category".to_string()),
        });
        return None;
    }

    if !categories.contains_key(source.category_id.as_str()) {
        errors.push(ContentConfigError::Validation {
            field: format!("{field_prefix}.category_id"),
            message: format!("Referenced category '{}' not found", source.category_id),
            suggestion: Some("Add this category to the categories section".to_string()),
        });
    }

    let resolved_path = if source.path.starts_with('/') {
        PathBuf::from(&source.path)
    } else {
        base_path.join(&source.path)
    };

    let Ok(canonical_path) = std::fs::canonicalize(&resolved_path) else {
        errors.push(ContentConfigError::Validation {
            field: format!("{field_prefix}.path"),
            message: "Content source directory does not exist".to_string(),
            suggestion: Some("Create the directory or fix the path".to_string()),
        });
        return None;
    };

    Some(ContentSourceConfigValidated {
        path: canonical_path,
        source_id: source.source_id.clone(),
        category_id: source.category_id.clone(),
        enabled: source.enabled,
        description: source.description.clone(),
        allowed_content_types: source.allowed_content_types.clone(),
        indexing: source.indexing.unwrap_or(IndexingConfig {
            clear_before: false,
            recursive: false,
            override_existing: false,
        }),
        sitemap: source.sitemap.clone(),
        branding: source.branding.clone(),
    })
}