riley_cms_core/
lib.rs

1//! # riley-cms-core
2//!
3//! Core library for riley_cms - a minimal, self-hosted headless CMS.
4//!
5//! This crate provides the domain logic for riley_cms without any HTTP or CLI concerns.
6//! It can be embedded in other Rust applications or used standalone.
7//!
8//! ## Features
9//!
10//! - **Content Management**: Parse and query posts and series from a Git-based content directory
11//! - **S3/R2 Storage**: Upload and list assets from S3-compatible storage
12//! - **In-Memory Caching**: Fast content access with cache refresh on demand
13//! - **Visibility Control**: Support for drafts, scheduled posts, and live content
14//!
15//! ## Quick Start
16//!
17//! ```ignore
18//! use riley_cms_core::{RileyCms, resolve_config, ListOptions};
19//!
20//! #[tokio::main]
21//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
22//!     // Load config from standard locations
23//!     let config = resolve_config(None)?;
24//!
25//!     // Create the RileyCms instance
26//!     let riley_cms = RileyCms::from_config(config).await?;
27//!
28//!     // List all live posts
29//!     let posts = riley_cms.list_posts(&ListOptions::default()).await?;
30//!     for post in posts.items {
31//!         println!("{}: {}", post.slug, post.title);
32//!     }
33//!
34//!     // Get a specific post
35//!     if let Some(post) = riley_cms.get_post("my-post").await? {
36//!         println!("Content: {}", post.content);
37//!     }
38//!
39//!     Ok(())
40//! }
41//! ```
42//!
43//! ## Content Structure
44//!
45//! riley_cms expects content in this structure:
46//!
47//! ```text
48//! content/
49//! ├── my-post/
50//! │   ├── config.toml
51//! │   └── content.mdx
52//! └── my-series/
53//!     ├── series.toml
54//!     ├── part-one/
55//!     │   ├── config.toml
56//!     │   └── content.mdx
57//!     └── part-two/
58//!         ├── config.toml
59//!         └── content.mdx
60//! ```
61//!
62//! ## Visibility Model
63//!
64//! Content visibility is controlled by the `goes_live_at` field:
65//!
66//! - `None` → Draft (only visible with `include_drafts`)
67//! - `Some(past_date)` → Live (always visible)
68//! - `Some(future_date)` → Scheduled (only visible with `include_scheduled`)
69
70mod config;
71mod content;
72mod error;
73pub mod git;
74mod security;
75mod storage;
76mod types;
77
78pub use config::{Config, GitConfig, RileyCmsConfig, resolve_config};
79pub use content::ContentCache;
80pub use error::{Error, Result};
81pub use git::{BodyStream, GitBackend, GitCgiCompletion, GitCgiHeaders, GitCgiStreamResponse};
82pub use storage::Storage;
83pub use types::*;
84
85use chrono::Utc;
86use hmac::{Hmac, Mac};
87use sha2::Sha256;
88use std::net::ToSocketAddrs;
89use std::path::Path;
90use std::sync::Arc;
91use tokio::sync::RwLock;
92
93/// Main entry point for riley_cms functionality.
94///
95/// `RileyCms` provides access to all CMS operations: listing posts and series,
96/// retrieving individual content, managing assets, and cache control.
97///
98/// # Example
99///
100/// ```ignore
101/// use riley_cms_core::{RileyCms, RileyCmsConfig, ListOptions};
102///
103/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
104/// let config: RileyCmsConfig = toml::from_str(r#"
105/// [content]
106/// repo_path = "/data/content"
107/// [storage]
108/// bucket = "my-assets"
109/// public_url_base = "https://assets.example.com"
110/// "#)?;
111///
112/// let riley_cms = RileyCms::from_config(config).await?;
113/// let posts = riley_cms.list_posts(&ListOptions::default()).await?;
114/// # Ok(())
115/// # }
116/// ```
117pub struct RileyCms {
118    config: RileyCmsConfig,
119    cache: Arc<RwLock<ContentCache>>,
120    storage: Storage,
121}
122
123impl RileyCms {
124    /// Create a new RileyCms instance from configuration.
125    ///
126    /// This loads content from disk into an in-memory cache and initializes
127    /// the S3 storage client.
128    ///
129    /// # Errors
130    ///
131    /// Returns an error if content cannot be loaded or S3 configuration is invalid.
132    pub async fn from_config(config: RileyCmsConfig) -> Result<Self> {
133        let storage = Storage::new(&config.storage).await?;
134
135        // Clone content config to move into the blocking task closure
136        let content_config = config.content.clone();
137
138        // Offload blocking filesystem I/O to a dedicated thread pool
139        let cache = tokio::task::spawn_blocking(move || ContentCache::load(&content_config))
140            .await
141            .map_err(|e| Error::Io(std::io::Error::other(e)))??;
142
143        Ok(Self {
144            config,
145            cache: Arc::new(RwLock::new(cache)),
146            storage,
147        })
148    }
149
150    /// List posts with filtering and pagination.
151    ///
152    /// By default, only live posts (with `goes_live_at` in the past) are returned.
153    /// Use [`ListOptions`] to include drafts or scheduled posts.
154    ///
155    /// Posts are sorted by `goes_live_at` descending (newest first).
156    pub async fn list_posts(&self, opts: &ListOptions) -> Result<ListResult<PostSummary>> {
157        let cache = self.cache.read().await;
158        cache.list_posts(opts)
159    }
160
161    /// Get a single post by its slug.
162    ///
163    /// Returns `None` if no post with the given slug exists.
164    /// Note: This returns the post regardless of visibility status.
165    pub async fn get_post(&self, slug: &str) -> Result<Option<Post>> {
166        let cache = self.cache.read().await;
167        cache.get_post(slug)
168    }
169
170    /// List series with filtering and pagination.
171    ///
172    /// By default, only live series are returned.
173    /// Series are sorted by `goes_live_at` descending.
174    pub async fn list_series(&self, opts: &ListOptions) -> Result<ListResult<SeriesSummary>> {
175        let cache = self.cache.read().await;
176        cache.list_series(opts)
177    }
178
179    /// Get a single series by its slug, including all posts.
180    ///
181    /// Posts within the series are sorted by their `order` field,
182    /// with alphabetical fallback for ties or missing values.
183    pub async fn get_series(&self, slug: &str) -> Result<Option<Series>> {
184        let cache = self.cache.read().await;
185        cache.get_series(slug)
186    }
187
188    /// Validate content structure and return any errors.
189    ///
190    /// Checks for common issues like empty titles, missing content, etc.
191    pub async fn validate_content(&self) -> Result<Vec<ValidationError>> {
192        let cache = self.cache.read().await;
193        Ok(cache.validate())
194    }
195
196    /// List assets in the S3/R2 storage bucket with pagination.
197    ///
198    /// Uses cursor-based pagination via S3 continuation tokens.
199    /// Defaults to 100 assets per page, capped at 1000.
200    pub async fn list_assets(&self, opts: &AssetListOptions) -> Result<AssetListResult> {
201        self.storage.list_assets(opts).await
202    }
203
204    /// Upload a file to the storage bucket.
205    ///
206    /// # Arguments
207    ///
208    /// * `path` - Local file path to upload
209    /// * `dest` - Optional destination path in bucket (defaults to filename)
210    pub async fn upload_asset(&self, path: &Path, dest: Option<&str>) -> Result<Asset> {
211        self.storage.upload_asset(path, dest).await
212    }
213
214    /// Refresh the content cache from disk.
215    ///
216    /// Call this after content has been updated (e.g., after a git push)
217    /// to reload the in-memory cache.
218    pub async fn refresh(&self) -> Result<()> {
219        // Clone the config to move into the blocking task closure
220        let content_config = self.config.content.clone();
221
222        // Offload blocking filesystem I/O to a dedicated thread pool
223        let new_cache = tokio::task::spawn_blocking(move || ContentCache::load(&content_config))
224            .await
225            .map_err(|e| Error::Io(std::io::Error::other(e)))??;
226
227        let mut cache = self.cache.write().await;
228        *cache = new_cache;
229        Ok(())
230    }
231
232    /// Get an ETag representing the current content state.
233    ///
234    /// This is a hash of all content, suitable for HTTP caching headers.
235    /// The ETag changes when any content is modified.
236    pub async fn content_etag(&self) -> String {
237        let cache = self.cache.read().await;
238        cache.etag()
239    }
240
241    /// Fire webhooks after content update.
242    ///
243    /// Each webhook is validated and sent atomically: DNS is resolved once,
244    /// checked against private/internal IP ranges, and the connection is pinned
245    /// to the validated IP (preventing DNS rebinding/TOCTOU attacks).
246    ///
247    /// If a `secret` is configured in `[webhooks]`, signs each request body with
248    /// HMAC-SHA256 and includes the hex signature in the `X-Riley-Cms-Signature` header.
249    /// Retries up to 3 times with exponential backoff on network errors or 5xx responses.
250    pub async fn fire_webhooks(&self) {
251        if let Some(ref webhooks) = self.config.webhooks {
252            // Resolve webhook secret once (if configured)
253            let secret = if let Some(ref secret_config) = webhooks.secret {
254                match secret_config.resolve() {
255                    Ok(v) if v.is_empty() => {
256                        tracing::error!(
257                            "Webhook secret resolves to empty string. Skipping webhook delivery."
258                        );
259                        return;
260                    }
261                    Ok(v) => Some(v),
262                    Err(e) => {
263                        tracing::error!(
264                            "Failed to resolve webhook secret: {}. Skipping webhook delivery.",
265                            e
266                        );
267                        return;
268                    }
269                }
270            } else {
271                None
272            };
273
274            for url in &webhooks.on_content_update {
275                let url = url.clone();
276                let secret = secret.clone();
277                tokio::spawn(async move {
278                    send_webhook(&url, secret.as_deref()).await;
279                });
280            }
281        }
282    }
283
284    /// Get a reference to the config.
285    pub fn config(&self) -> &RileyCmsConfig {
286        &self.config
287    }
288}
289
290/// Maximum number of retry attempts for webhook delivery.
291const WEBHOOK_MAX_RETRIES: u32 = 3;
292
293/// Send a single webhook with SSRF protection, optional HMAC signing, and retry.
294///
295/// Resolves DNS once, validates all IPs against private ranges, then pins the
296/// connection to the validated IP using `reqwest::ClientBuilder::resolve()`.
297/// This prevents DNS rebinding (TOCTOU) attacks where DNS changes between
298/// validation and the actual connection.
299///
300/// Retries on network errors or 5xx responses. Does not retry on 4xx (client errors)
301/// since those indicate a problem with the receiver's configuration, not a transient issue.
302async fn send_webhook(url: &str, secret: Option<&str>) {
303    // 1. Parse URL and validate scheme
304    let parsed = match reqwest::Url::parse(url) {
305        Ok(u) => u,
306        Err(e) => {
307            tracing::warn!("Skipping webhook {}: invalid URL: {}", url, e);
308            return;
309        }
310    };
311
312    let scheme = parsed.scheme();
313    if scheme != "http" && scheme != "https" {
314        tracing::warn!("Skipping webhook {}: unsupported scheme: {}", url, scheme);
315        return;
316    }
317
318    let host = match parsed.host_str() {
319        Some(h) => h.to_string(),
320        None => {
321            tracing::warn!("Skipping webhook {}: missing host", url);
322            return;
323        }
324    };
325    let port = parsed.port_or_known_default().unwrap_or(443);
326
327    // 2. Resolve DNS once and validate all IPs
328    let addr_str = format!("{}:{}", host, port);
329    let addrs: Vec<std::net::SocketAddr> = match addr_str.to_socket_addrs() {
330        Ok(a) => a.collect(),
331        Err(e) => {
332            tracing::warn!("Skipping webhook {}: DNS resolution failed: {}", url, e);
333            return;
334        }
335    };
336
337    // 3. Find a safe (non-private) IP address to connect to
338    let safe_addr = match addrs.into_iter().find(|a| security::is_safe_ip(&a.ip())) {
339        Some(a) => a,
340        None => {
341            tracing::warn!(
342                "Skipping webhook {}: all resolved IPs are private/internal",
343                url
344            );
345            return;
346        }
347    };
348
349    // 4. Build client pinned to the validated IP (prevents DNS rebinding)
350    //    Redirects disabled to prevent SSRF bypass via 302 to internal IPs.
351    let client = reqwest::Client::builder()
352        .resolve(&host, safe_addr)
353        .redirect(reqwest::redirect::Policy::none())
354        .timeout(std::time::Duration::from_secs(10))
355        .build()
356        .unwrap_or_else(|_| reqwest::Client::new());
357
358    // Include a timestamp in the payload to prevent replay attacks.
359    // Each webhook delivery gets a unique signature since the body changes.
360    let body = serde_json::json!({
361        "event": "content_update",
362        "timestamp": Utc::now().timestamp()
363    })
364    .to_string();
365
366    // Compute HMAC signature if secret is configured
367    let signature = match secret {
368        Some(s) => {
369            let mut mac = match Hmac::<Sha256>::new_from_slice(s.as_bytes()) {
370                Ok(m) => m,
371                Err(e) => {
372                    tracing::error!("Invalid webhook secret key: {}. Skipping webhook.", e);
373                    return;
374                }
375            };
376            mac.update(body.as_bytes());
377            Some(hex::encode(mac.finalize().into_bytes()))
378        }
379        None => None,
380    };
381
382    for attempt in 0..WEBHOOK_MAX_RETRIES {
383        let mut request = client
384            .post(url)
385            .header("Content-Type", "application/json")
386            .body(body.clone());
387
388        if let Some(ref sig) = signature {
389            request = request.header("X-Riley-Cms-Signature", format!("sha256={}", sig));
390        }
391
392        match request.send().await {
393            Ok(response) if response.status().is_success() => return,
394            Ok(response) if response.status().is_client_error() => {
395                tracing::warn!(
396                    "Webhook {} returned {} (not retrying)",
397                    url,
398                    response.status()
399                );
400                return;
401            }
402            Ok(response) => {
403                tracing::warn!(
404                    "Webhook {} returned {} (attempt {}/{})",
405                    url,
406                    response.status(),
407                    attempt + 1,
408                    WEBHOOK_MAX_RETRIES
409                );
410            }
411            Err(e) => {
412                tracing::warn!(
413                    "Webhook {} failed: {} (attempt {}/{})",
414                    url,
415                    e,
416                    attempt + 1,
417                    WEBHOOK_MAX_RETRIES
418                );
419            }
420        }
421
422        // Exponential backoff: 1s, 2s, 4s
423        if attempt < WEBHOOK_MAX_RETRIES - 1 {
424            tokio::time::sleep(std::time::Duration::from_secs(1 << attempt)).await;
425        }
426    }
427
428    tracing::error!(
429        "Webhook {} failed after {} attempts",
430        url,
431        WEBHOOK_MAX_RETRIES
432    );
433}